Commit 56bd6d72 authored by DanFabulich's avatar DanFabulich
Browse files

First checkin

git-svn-id: https://sitemapgen4j.googlecode.com/svn/trunk@2 aa787bee-eda5-11dd-ada0-abde575de245
parent c6542901
Loading
Loading
Loading
Loading

TODO.txt

0 → 100644
+18 −0
Original line number Diff line number Diff line

Ping search engines

Text file reader
Sitemap reader

Improve validator for basic sitemap case (gzip, 10MB, urls, encoding)
validate Mobile/Geo/Video/Code/News sitemaps


JS api
addUrl({url:"http://www.example.com",lastMod:"2007-08-01");
new WebSitemapGenerator({});
new SitemapIndexGenerator({});

Google KML generation
GeoRSS generation
Google Code packagemap http://www.google.com/help/codesearch_packagemap.html

pom.xml

0 → 100644
+34 −0
Original line number Diff line number Diff line
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.redfin</groupId>
  <artifactId>sitemapgen4j</artifactId>
  <packaging>jar</packaging>
  <version>1.0-SNAPSHOT</version>
  <name>SitemapGen4J</name>
  <build>
    <defaultGoal>install</defaultGoal>
    <plugins>
      <plugin>
        <artifactId>maven-compiler-plugin</artifactId>
        <configuration>
          <source>1.5</source>
          <target>1.5</target>
        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-eclipse-plugin</artifactId>
        <version>2.5.1</version>
      </plugin>
    </plugins>
  </build>
  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
  </dependencies>
</project>
+69 −0
Original line number Diff line number Diff line
package com.redfin.sitemapgenerator;

import java.io.File;
import java.net.URL;

// that weird thing with generics is so sub-classed objects will return themselves
// It makes sense, I swear! http://madbean.com/2004/mb2004-3/
abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGeneratorOptions<THIS>> {
	File baseDir;
	String baseUrl;
	String fileNamePrefix = "sitemap";
	boolean allowMultipleSitemaps = true;
	W3CDateFormat dateFormat;
	int maxUrls = SitemapGenerator.MAX_URLS_PER_SITEMAP;
	boolean autoValidate = false;
	boolean gzip = false;
	
	public AbstractSitemapGeneratorOptions(URL baseUrl, File baseDir) {
		if (baseDir == null) throw new NullPointerException("baseDir may not be null");
		if (baseUrl == null) throw new NullPointerException("baseUrl may not be null");
		this.baseDir = baseDir;
		this.baseUrl = baseUrl.toString();
	}
	
	/** The prefix of the name of the sitemaps we'll create; by default this is "sitemap" */
	public THIS fileNamePrefix(String fileNamePrefix) {
		if (fileNamePrefix == null) throw new NullPointerException("fileNamePrefix may not be null");
		this.fileNamePrefix = fileNamePrefix;
		return getThis();
	}
	/** When more than the maximum number of URLs are passed in, should we split into multiple sitemaps automatically, or just throw an exception? */
	public THIS allowMultipleSitemaps(boolean allowMultipleSitemaps) {
		this.allowMultipleSitemaps = allowMultipleSitemaps;
		return getThis();
	}
	/** The date formatter, typically configured with a {@link W3CDateFormat.Pattern} and/or a time zone */
	public THIS dateFormat(W3CDateFormat dateFormat) {
		this.dateFormat = dateFormat;
		return getThis();
	}
	/**
	 * The maximum number of URLs to allow per sitemap; the default is the
	 * maximum allowed (50,000), but you can decrease it if you wish (to make
	 * your auto-generated sitemaps smaller)
	 */
	public THIS maxUrls(int maxUrls) {
		if (maxUrls > SitemapGenerator.MAX_URLS_PER_SITEMAP) {
			throw new RuntimeException("You can only have 50,000 URLs per sitemap; to use more, allowMultipleSitemaps and generate a sitemap index. You asked for " + maxUrls);
		}
		this.maxUrls = maxUrls;
		return getThis();
	}
	/**
	 * Validate the sitemaps automatically after writing them; this takes time (and may fail for Google-specific sitemaps)
	 */
	public THIS autoValidate(boolean autoValidate) {
		this.autoValidate = autoValidate;
		return getThis();
	}
	/** Gzip the sitemaps after they are written to disk */
	THIS gzip(boolean gzip) {
		this.gzip = gzip;
		return getThis();
	}
	@SuppressWarnings("unchecked")
	public THIS getThis() {
		return (THIS)this;
	}
}
 No newline at end of file
+112 −0
Original line number Diff line number Diff line
package com.redfin.sitemapgenerator;

import java.net.MalformedURLException;
import java.net.URL;
import java.text.ParseException;
import java.util.Date;

/** Container for optional URL parameters */
//that weird thing with generics is so sub-classed objects will return themselves
//It makes sense, I swear! http://madbean.com/2004/mb2004-3/
abstract class AbstractSitemapUrlOptions<U extends WebSitemapUrl, THIS extends AbstractSitemapUrlOptions<U,THIS>> {
	Date lastMod;
	ChangeFreq changeFreq;
	Double priority;
	URL url;
	Class<U> clazz;
	
	public AbstractSitemapUrlOptions(String url, Class<U> clazz) throws MalformedURLException {
		this(new URL(url), clazz);
	}
	
	public AbstractSitemapUrlOptions(URL url, Class<U> clazz) {
		if (url == null) throw new NullPointerException("URL may not be null");
		this.url = url;
		this.clazz = clazz;
	}
	
	/**
	 * The date of last modification of the file. Note that this tag is
	 * separate from the If-Modified-Since (304) header the server can
	 * return, and search engines may use the information from both sources
	 * differently.
	 */
	public THIS lastMod(Date lastMod) {
		this.lastMod = lastMod;
		return getThis();
	}
	
	/**
	 * The date of last modification of the file. Note that this tag is
	 * separate from the If-Modified-Since (304) header the server can
	 * return, and search engines may use the information from both sources
	 * differently.
	 * @throws ParseException if the string isn't a valid W3C date time
	 * @see W3CDateFormat
	 */
	public THIS lastMod(String lastMod) throws ParseException {
		this.lastMod = new W3CDateFormat().parse(lastMod);
		return getThis();
	}
	
	/**
	 * How frequently the page is likely to change. This value provides
	 * general information to search engines and may not correlate exactly
	 * to how often they crawl the page. The value {@link ChangeFreq#ALWAYS} should be used to
	 * describe documents that change each time they are accessed. The value
	 * {@link ChangeFreq#NEVER} should be used to describe archived URLs.
	 * 
	 * <p>Please note that the
	 * value of this tag is considered a <em>hint</em> and not a command. Even though
	 * search engine crawlers may consider this information when making
	 * decisions, they may crawl pages marked {@link ChangeFreq#HOURLY} less frequently than
	 * that, and they may crawl pages marked {@link ChangeFreq#YEARLY} more frequently than
	 * that. Crawlers may periodically crawl pages marked {@link ChangeFreq#NEVER} so that
	 * they can handle unexpected changes to those pages.</p>
	 */
	public THIS changeFreq(ChangeFreq changeFreq) {
		this.changeFreq = changeFreq;
		return getThis();
	}
	
	/**
	 * <p>The priority of this URL relative to other URLs on your site. Valid
	 * values range from 0.0 to 1.0. This value does not affect how your
	 * pages are compared to pages on other sites—it only lets the search
	 * engines know which pages you deem most important for the crawlers.</p>
	 * 
	 * <p>The default priority of a page is 0.5.</p>
	 * 
	 * <p>Please note that the priority you assign to a page is not likely to
	 * influence the position of your URLs in a search engine's result
	 * pages. Search engines may use this information when selecting between
	 * URLs on the same site, so you can use this tag to increase the
	 * likelihood that your most important pages are present in a search
	 * index.</p>
	 * 
	 * <p>Also, please note that assigning a high priority to all of the URLs
	 * on your site is not likely to help you. Since the priority is
	 * relative, it is only used to select between URLs on your site.</p>
	 */
	public THIS priority(Double priority) {
		if (priority > 1.0) throw new IllegalArgumentException("Priority may not be greater than 1.0: " + priority);
		if (priority < 0) throw new IllegalArgumentException("Priority may not be less than 0: " + priority);
		this.priority = priority;
		return getThis();
	}
	
	@SuppressWarnings("unchecked")
	THIS getThis() {
		return (THIS)this;
	}
	
	/** Return an URL based on these settings */
	public U build() {
		try {
			return clazz.getConstructor(getClass()).newInstance(this);
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}
	
}
 No newline at end of file
+48 −0
Original line number Diff line number Diff line
package com.redfin.sitemapgenerator;

import java.io.IOException;
import java.io.OutputStreamWriter;

abstract class AbstractSitemapUrlRenderer<T extends WebSitemapUrl> implements ISitemapUrlRenderer<T> {

	public void render(WebSitemapUrl url, OutputStreamWriter out, W3CDateFormat dateFormat, String additionalData)
			throws IOException {
		out.write("  <url>\n");
		out.write("    <loc>");
		out.write(url.getUrl().toString());
		out.write("</loc>\n");
		if (url.getLastMod() != null) {
			out.write("    <lastmod>");
			out.write(dateFormat.format(url.getLastMod()));
			out.write("</lastmod>\n");
		}
		if (url.getChangeFreq() != null) {
			out.write("    <changefreq>");
			out.write(url.getChangeFreq().toString());
			out.write("</changefreq>\n");
		}
		if (url.getPriority() != null) {
			out.write("    <priority>");
			out.write(url.getPriority().toString());
			out.write("</priority>\n");
		}
		if (additionalData != null) out.write(additionalData);
		out.write("  </url>\n");
	}

	public void renderTag(StringBuilder sb, String namespace, String tagName, Object value) {
		if (value == null) return;
		sb.append("      <");
		sb.append(namespace);
		sb.append(':');
		sb.append(tagName);
		sb.append('>');
		sb.append(value);
		sb.append("</");
		sb.append(namespace);
		sb.append(':');
		sb.append(tagName);
		sb.append(">\n");
	}

}
Loading