Commit 5ea973b1 authored by Navtej Sadhal's avatar Navtej Sadhal Committed by Dan Fabulich
Browse files

Changing url-check so that it allows different schemes but still requires the same domain



(cherry picked from commit d4ff78918dd9317a5686b9675cdade3cb6c2007e)
Signed-off-by: default avatarDan Fabulich <dan.fabulich@redfin.com>
parent 95b23f2e
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ import java.net.URL;
// It makes sense, I swear! http://madbean.com/2004/mb2004-3/
abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGeneratorOptions<THIS>> {
	File baseDir;
	String baseUrl;
	URL baseUrl;
	String fileNamePrefix = "sitemap";
	boolean allowMultipleSitemaps = true;
	W3CDateFormat dateFormat;
@@ -19,7 +19,7 @@ abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGener
		if (baseDir == null) throw new NullPointerException("baseDir may not be null");
		if (baseUrl == null) throw new NullPointerException("baseUrl may not be null");
		this.baseDir = baseDir;
		this.baseUrl = baseUrl.toString();
		this.baseUrl = baseUrl;
	}
	
	/** The prefix of the name of the sitemaps we'll create; by default this is "sitemap" */
@@ -62,6 +62,7 @@ abstract class AbstractSitemapGeneratorOptions<THIS extends AbstractSitemapGener
		this.gzip = gzip;
		return getThis();
	}
	
	@SuppressWarnings("unchecked")
	THIS getThis() {
		return (THIS)this;
+4 −8
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGener
	/** 50000 URLs per sitemap maximum */
	public static final int MAX_URLS_PER_SITEMAP = 50000;
	
	private final String baseUrl;
	private final URL baseUrl;
	private final File baseDir;
	private final String fileNamePrefix;
	private final String fileNameSuffix;
@@ -56,7 +56,7 @@ abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGener
	 */
	public THIS addUrl(U url) {
		if (finished) throw new RuntimeException("Sitemap already printed; you must create a new generator to make more sitemaps"); 
		UrlUtils.checkUrl(url.getUrl().toString(), baseUrl);
		UrlUtils.checkUrl(url.getUrl(), baseUrl);
		if (urls.size() == maxUrls) {
			if (!allowMultipleSitemaps) throw new RuntimeException("More than " + maxUrls + " urls, but allowMultipleSitemaps is false.  Enable allowMultipleSitemaps to split the sitemap into multiple files with a sitemap index.");
			if (mapCount == 0) mapCount++;
@@ -170,11 +170,7 @@ abstract class SitemapGenerator<U extends ISitemapUrl, THIS extends SitemapGener
		if (!finished) throw new RuntimeException("Sitemaps not generated yet; call write() first");
		File outFile = new File(baseDir, "sitemap_index.xml");
		SitemapIndexGenerator sig;		
		try {
		sig = new SitemapIndexGenerator.Options(baseUrl, outFile).dateFormat(dateFormat).autoValidate(autoValidate).build();		
		} catch (MalformedURLException e) {
			throw new RuntimeException("bug", e);
		}
		sig.addUrls(fileNamePrefix, fileNameSuffix, mapCount).write();
	}
	
+3 −5
Original line number Diff line number Diff line
@@ -18,7 +18,6 @@ import org.xml.sax.SAXException;
 */
public class SitemapIndexGenerator {
	private final URL baseUrl;	
	private final String baseUrlString;
	private final File outFile;
	private final ArrayList<SitemapIndexUrl> urls = new ArrayList<SitemapIndexUrl>();
	private final int maxUrls;
@@ -116,7 +115,6 @@ public class SitemapIndexGenerator {
	
	private SitemapIndexGenerator(Options options) {
		this.baseUrl = options.baseUrl;		
		this.baseUrlString = baseUrl.toString();
		this.outFile = options.outFile;
		this.maxUrls = options.maxUrls;
		W3CDateFormat dateFormat = options.dateFormat;
@@ -128,7 +126,7 @@ public class SitemapIndexGenerator {
	
	/** Adds a single sitemap to the index */
	public SitemapIndexGenerator addUrl(SitemapIndexUrl url) { 
		UrlUtils.checkUrl(url.url.toString(), baseUrlString);
		UrlUtils.checkUrl(url.url, baseUrl);
		if (urls.size() >= maxUrls) {
			throw new RuntimeException("More than " + maxUrls + " urls");
		}
+9 −3
Original line number Diff line number Diff line
package com.redfin.sitemapgenerator;

import java.net.URL;
import java.util.HashMap;

class UrlUtils {

	static void checkUrl(String url, String baseUrl) {
	static void checkUrl(URL url, URL baseUrl) {
		// Is there a better test to use here?
		if (!url.startsWith(baseUrl)) {
			throw new RuntimeException("Url " + url + " doesn't start with base URL " + baseUrl);
		
		if (baseUrl.getHost() == null) {
			throw new RuntimeException("base URL is null");
		}
		
		if (!baseUrl.getHost().equalsIgnoreCase(url.getHost())) {
			throw new RuntimeException("Domain of URL " + url + " doesn't match base URL " + baseUrl);
		}
	}

+15 −0
Original line number Diff line number Diff line
@@ -162,6 +162,21 @@ public class SitemapGeneratorTest extends TestCase {
		} catch (RuntimeException e) {}
	}

	public void testSameDomainDifferentSchemeOK() throws Exception {
		wsg = new WebSitemapGenerator("http://www.example.com", dir);
			
		wsg.addUrl("https://www.example.com/index.html");
		
		String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + 
				"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" >\n" + 
				"  <url>\n" + 
				"    <loc>https://www.example.com/index.html</loc>\n" + 
				"  </url>\n" + 
				"</urlset>";
		String sitemap = writeSingleSiteMap(wsg);
		assertEquals(expected, sitemap);		
	}
	
	public void testDoubleWrite() throws Exception {
		testSimpleUrl();
		try {