Skip to content

Commit

Permalink
feat: add sitemap entry and options
Browse files Browse the repository at this point in the history
  • Loading branch information
guqing committed Nov 12, 2022
1 parent 062bd4a commit 9303388
Show file tree
Hide file tree
Showing 4 changed files with 258 additions and 0 deletions.
26 changes: 26 additions & 0 deletions src/main/java/run/halo/sitemap/SitemapEntry.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package run.halo.sitemap;

import lombok.Builder;
import lombok.Data;
import lombok.NonNull;

/**
* @author guqing
* @since 1.0.0
*/
@Data
@Builder
public class SitemapEntry {
/**
* <p>Parent tag for each URL entry. The remaining tags are children of this tag.</p>
* required.
*/
@NonNull
private String loc;

private String lastmod;

private ChangeFreqEnum changefreq;

private Double priority;
}
113 changes: 113 additions & 0 deletions src/main/java/run/halo/sitemap/SitemapGeneratorOptions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package run.halo.sitemap;

import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.format.DateTimeFormatter;
import java.util.Set;
import lombok.Builder;
import lombok.Data;
import lombok.NonNull;
import org.apache.commons.lang3.StringUtils;
import org.springframework.web.util.UriUtils;
import run.halo.app.infra.utils.PathUtils;

/**
* Sitemap generator options.
*
* @author guqing
* @since 1.0.0
*/
@Data
@Builder
public class SitemapGeneratorOptions {
/**
* All URLs in the generated sitemap(s) should appear under this base URL
*/
@NonNull
private URL siteUrl;

@Builder.Default
private String fileNamePrefix = "sitemap";

@Builder.Default
private boolean allowEmptySitemap = false;

@Builder.Default
private boolean allowMultipleSitemaps = true;

@Builder.Default
private DateTimeFormatter dateTimeFormatter = W3cDatetimeFormat.MILLISECOND_FORMATTER;

/**
* Split large sitemap into multiple files by specifying sitemap size. Default 5000.
*/
@Builder.Default
private int sitemapSize = 5000;

@Builder.Default
private boolean autoValidate = false;

@Builder.Default
private boolean gzip = false;

@Builder.Default
private ChangeFreqEnum changefreq = ChangeFreqEnum.DAILY;

@Builder.Default
private double priority = 0.7;

/**
* Add &lt;lastmod/&gt; property. Default true
*/
@Builder.Default
private boolean autoLastmod = true;

/**
* <p>Array of relative paths (wildcard pattern supported) to exclude from listing on sitemap
* .xml or sitemap-*.xml.</p>
*
* <p>e.g.: ['/page-0', '/page-*', '/private/*'].</p>
* Apart from this option next-sitemap also offers a custom transform option which could be
* used to exclude urls that match specific patterns
*/
private Set<String> exclude;

/**
* Generate index sitemaps. Default true.
*/
@Builder.Default
private boolean generateIndexSitemap = true;

public SitemapEntry transform(String url) {
if (StringUtils.isBlank(url)) {
return null;
}
String escapedUrl = UrlUtils.escapeSitemapUrl(url);
String loc = UrlUtils.toURI(escapedUrl).normalize().toASCIIString();
if (!PathUtils.isAbsoluteUri(loc)) {
loc = getSiteUri().resolve(escapedUrl).normalize().toASCIIString();
}

SitemapEntry.SitemapEntryBuilder builder = SitemapEntry.builder()
.loc(loc)
.changefreq(changefreq)
.priority(priority);

if (dateTimeFormatter != null && autoLastmod) {
builder.lastmod(W3cDatetimeFormat.format(Instant.now(), dateTimeFormatter));
}
return builder.build();
}


private URI getSiteUri() {
try {
return siteUrl.toURI();
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}
}
40 changes: 40 additions & 0 deletions src/main/java/run/halo/sitemap/UrlUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package run.halo.sitemap;

import java.net.URI;
import java.net.URISyntaxException;
import lombok.experimental.UtilityClass;
import org.springframework.util.Assert;

/**
* @author guqing
* @since 1.0.0
*/
@UtilityClass
public final class UrlUtils {

/**
* <p>Your Sitemap file must be UTF-8 encoded (you can generally do this when you save the
* file).</p>
* As with all XML files, any data values (including URLs) must use entity escape codes for
* the characters listed in the table below.
*/
public static String escapeSitemapUrl(String url) {
Assert.notNull(url, "The url must not be null");
return url.replaceAll("&", "&amp;")
.replaceAll("'", "&apos;")
.replaceAll("\"", "&quot;")
.replaceAll(">", "&gt;")
.replaceAll("<", "&lt;");
}

public static URI toURI(String s) {
if (s == null) {
return null;
}
try {
return new URI(escapeSitemapUrl(s));
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}
}
79 changes: 79 additions & 0 deletions src/main/java/run/halo/sitemap/W3cDatetimeFormat.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package run.halo.sitemap;

import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import lombok.experimental.UtilityClass;

/**
* Different standards may need different levels of granularity in the date and time, so this
* profile defines six levels. Standards that reference this profile should specify one or more
* of these granularities. If a given standard allows more than one granularity, it should
* specify the meaning of the dates and times with reduced precision, for example, the result of
* comparing two dates with different precisions.
* <p>
* The formats are as follows. Exactly the components shown here must be present, with exactly
* this punctuation. Note that the "T" appears literally in the string, to indicate the beginning
* of the time element, as specified in ISO 8601.
* <p>
* Year:
* YYYY (eg 1997)
* Year and month:
* YYYY-MM (eg 1997-07)
* Complete date:
* YYYY-MM-DD (eg 1997-07-16)
* Complete date plus hours and minutes:
* YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
* Complete date plus hours, minutes and seconds:
* YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
* Complete date plus hours, minutes, seconds and a decimal fraction of a
* second
* YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
* where:
* <p>
* YYYY = four-digit year
* MM = two-digit month (01=January, etc.)
* DD = two-digit day of month (01 through 31)
* hh = two digits of hour (00 through 23) (am/pm NOT allowed)
* mm = two digits of minute (00 through 59)
* ss = two digits of second (00 through 59)
* s = one or more digits representing a decimal fraction of a second
* TZD = time zone designator (Z or +hh:mm or -hh:mm)
* This profile does not specify how many digits may be used to represent the decimal fraction of
* a second. An adopting standard that permits fractions of a second must specify both the
* minimum number of digits (a number greater than or equal to one) and the maximum number of
* digits (the maximum may be stated to be "unlimited").
* <p>
* This profile defines two ways of handling time zone offsets:
* <p>
* Times are expressed in UTC (Coordinated Universal Time), with a special UTC designator ("Z").
* Times are expressed in local time, together with a time zone offset in hours and minutes. A
* time zone offset of "+hh:mm" indicates that the date/time uses a local time zone which is "hh"
* hours and "mm" minutes ahead of UTC. A time zone offset of "-hh:mm" indicates that the
* date/time uses a local time zone which is "hh" hours and "mm" minutes behind UTC.
* A standard referencing this profile should permit one or both of these ways of handling time
* zone offsets.
*
* @author guqing
* @since 1.0.0
*/
@UtilityClass
public final class W3cDatetimeFormat {
public static final DateTimeFormatter MILLISECOND_FORMATTER =
DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZ");

public static final DateTimeFormatter SECOND_FORMATTER =
DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssZ");

public static final DateTimeFormatter MINUTE_FORMATTER =
DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mmZ");

public static String format(Instant instant, DateTimeFormatter formatter) {
return instant.atZone(ZoneId.systemDefault())
.format(formatter.withZone(ZoneId.systemDefault()));
}

public static String format(Instant instant) {
return format(instant, MILLISECOND_FORMATTER);
}
}

0 comments on commit 9303388

Please sign in to comment.