Skip to content

Commit

Permalink
Add Atom parsing for channel image and pub date
Browse files Browse the repository at this point in the history
  • Loading branch information
prof18 committed Jun 11, 2023
1 parent 92dec79 commit f2cc211
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import com.prof.rssparser.ItunesArticleData
import com.prof.rssparser.ItunesChannelData
import com.prof.rssparser.core.CoreXMLParser.getImageUrl
import com.prof.rssparser.utils.AtomKeyword
import com.prof.rssparser.utils.RSSKeyword
import com.prof.rssparser.utils.attributeValue
import com.prof.rssparser.utils.contains
import com.prof.rssparser.utils.nextTrimmedText
Expand Down Expand Up @@ -64,6 +63,12 @@ internal fun extractAtomContent(xmlPullParser: XmlPullParser): Channel {
}
//endregion

//region Channel tags
xmlPullParser.contains(AtomKeyword.Icon) -> {
channelImageBuilder.url(xmlPullParser.nextTrimmedText())
}
//endregion

//region Item tags
xmlPullParser.contains(AtomKeyword.Entry.Author) -> {
if (insideItem) {
Expand Down Expand Up @@ -113,14 +118,15 @@ internal fun extractAtomContent(xmlPullParser: XmlPullParser): Channel {
}
}

xmlPullParser.contains(AtomKeyword.Entry.PubDate) -> {
xmlPullParser.contains(AtomKeyword.Updated) -> {
if (insideChannel) {
channelBuilder.lastBuildDate(xmlPullParser.nextTrimmedText())
}
}

xmlPullParser.contains(AtomKeyword.Entry.Published) -> {
if (insideItem) {
val nextTokenType = xmlPullParser.next()
if (nextTokenType == XmlPullParser.TEXT) {
articleBuilder.pubDate(xmlPullParser.text.trim())
}
// Skip to be able to find date inside 'tag' tag
continue@loop
articleBuilder.pubDate(xmlPullParser.nextTrimmedText())
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@ package com.prof.rssparser.utils
internal sealed class AtomKeyword(val value: String) {
object Atom : AtomKeyword("feed")
object Title : AtomKeyword("title")
object Icon: AtomKeyword("icon")
object Link : AtomKeyword("link") {
object Href : AtomKeyword("href")
object Rel : AtomKeyword("rel")
object Edit : AtomKeyword("edit")
}
object Subtitle : AtomKeyword("subtitle")

object Updated: AtomKeyword("updated")
object Entry {
object Item : AtomKeyword("entry")
object Guid : AtomKeyword("id")
object Content : AtomKeyword("content")
object PubDate : AtomKeyword("updated")
object Published : AtomKeyword("published")
object Category : AtomKeyword("category")
object Term : AtomKeyword("term")
object Description : AtomKeyword("summary")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package com.prof.rssparser.core

import com.prof.rssparser.Image
import com.prof.rssparser.testutils.BaseCoreXMLParserTest

class CoreXMLParserAtomTest: BaseCoreXMLParserTest(
feedPath = "/feed-atom-test.xml",
channelTitle = "The Verge - All Posts",
channelLink = "https://www.theverge.com/",
channelImage = Image(
title = null,
link = null,
description = null,
url = "https://cdn.vox-cdn.com/community_logos/52801/VER_Logomark_32x32..png"
),
channelLastBuildDate = "2023-05-26T17:30:31-04:00",
channelUpdatePeriod = null,
articleGuid = "https://www.theverge.com/2023/5/26/23739273/google-sonos-smart-speaker-patent-lawsuit-ruling",
articleTitle = "Sonos wins \$32.5 million patent infringement victory over Google",
articleAuthor = "Chris Welch",
articleLink = "https://www.theverge.com/2023/5/26/23739273/google-sonos-smart-speaker-patent-lawsuit-ruling",
articlePubDate = "2023-05-26T17:30:31-04:00",
articleContent = """
<figure>
<img alt="A photo of the Sonos Era 300 on a kitchen dining table." src="https://cdn.vox-cdn.com/thumbor/oCea2Vc5FYLWqQXGUmA4O-rRrM0=/0x0:2040x1360/1310x873/cdn.vox-cdn.com/uploads/chorus_image/image/72316887/DSCF0491.0.jpg" />
<figcaption>Photo by Chris Welch / The Verge</figcaption>
</figure>
<p id="b46vcm">Google has been ordered to pay Sonos ${'$'}32.5 million for infringing on the company’s smart speaker patent. A <a href="https://www.documentcloud.org/documents/23826599-google-sonos-trial-verdict?responsive=1&amp;title=1">jury verdict</a> issued in a San Francisco courtroom on Friday found that Google’s smart speakers and media players infringed on one of two Sonos patents at issue.</p>
<p id="keHPBL"><a href="https://www.theverge.com/2020/1/7/21055048/sonos-google-lawsuit-sues-speakers-assistant-amazon">The legal battle started in 2020</a> when Sonos accused Google of copying its patented multiroom audio technology after the companies partnered in 2013. <a href="https://www.theverge.com/2022/1/6/22871121/sonos-google-patent-itc-ruling-decision-import-ban">Sonos went on to win its case at the US International Trade Commission</a>, resulting in a limited import ban on some of the Google devices in question. Google has also <a href="https://www.theverge.com/2022/1/6/22871304/google-home-speaker-group-volume-control-changes-sonos-patent-decision">had to pull some features</a> from its lineup of smart speakers and smart displays.</p>
<figure class="e-image">
<cite>Image: United States District Court for the Northern District of...</cite></figure>
<p>
<a href="https://www.theverge.com/2023/5/26/23739273/google-sonos-smart-speaker-patent-lawsuit-ruling">Continue reading&hellip;</a>
</p>
""".trimIndent(),
articleImage = "https://cdn.vox-cdn.com/thumbor/oCea2Vc5FYLWqQXGUmA4O-rRrM0=/0x0:2040x1360/1310x873/cdn.vox-cdn.com/uploads/chorus_image/image/72316887/DSCF0491.0.jpg"
)
35 changes: 35 additions & 0 deletions rssparser/src/test/resources/feed-atom-test.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
<title>The Verge - All Posts</title>
<icon>https://cdn.vox-cdn.com/community_logos/52801/VER_Logomark_32x32..png</icon>
<updated>2023-05-26T17:30:31-04:00</updated>
<id>https://www.theverge.com/rss/full.xml</id>
<link type="text/html" href="https://www.theverge.com/" rel="alternate"/>
<entry>
<published>2023-05-26T17:30:31-04:00</published>
<updated>2023-05-26T17:30:31-04:00</updated>
<title>Sonos wins $32.5 million patent infringement victory over Google</title>
<content type="html">

&lt;figure&gt;
&lt;img alt="A photo of the Sonos Era 300 on a kitchen dining table." src="https://cdn.vox-cdn.com/thumbor/oCea2Vc5FYLWqQXGUmA4O-rRrM0=/0x0:2040x1360/1310x873/cdn.vox-cdn.com/uploads/chorus_image/image/72316887/DSCF0491.0.jpg" /&gt;
&lt;figcaption&gt;Photo by Chris Welch / The Verge&lt;/figcaption&gt;
&lt;/figure&gt;

&lt;p id="b46vcm"&gt;Google has been ordered to pay Sonos $32.5 million for infringing on the company’s smart speaker patent. A &lt;a href="https://www.documentcloud.org/documents/23826599-google-sonos-trial-verdict?responsive=1&amp;amp;title=1"&gt;jury verdict&lt;/a&gt; issued in a San Francisco courtroom on Friday found that Google’s smart speakers and media players infringed on one of two Sonos patents at issue.&lt;/p&gt;
&lt;p id="keHPBL"&gt;&lt;a href="https://www.theverge.com/2020/1/7/21055048/sonos-google-lawsuit-sues-speakers-assistant-amazon"&gt;The legal battle started in 2020&lt;/a&gt; when Sonos accused Google of copying its patented multiroom audio technology after the companies partnered in 2013. &lt;a href="https://www.theverge.com/2022/1/6/22871121/sonos-google-patent-itc-ruling-decision-import-ban"&gt;Sonos went on to win its case at the US International Trade Commission&lt;/a&gt;, resulting in a limited import ban on some of the Google devices in question. Google has also &lt;a href="https://www.theverge.com/2022/1/6/22871304/google-home-speaker-group-volume-control-changes-sonos-patent-decision"&gt;had to pull some features&lt;/a&gt; from its lineup of smart speakers and smart displays.&lt;/p&gt;
&lt;figure class="e-image"&gt;

&lt;cite&gt;Image: United States District Court for the Northern District of...&lt;/cite&gt;&lt;/figure&gt;
&lt;p&gt;
&lt;a href="https://www.theverge.com/2023/5/26/23739273/google-sonos-smart-speaker-patent-lawsuit-ruling"&gt;Continue reading&amp;hellip;&lt;/a&gt;
&lt;/p&gt;

</content>
<link rel="alternate" type="text/html" href="https://www.theverge.com/2023/5/26/23739273/google-sonos-smart-speaker-patent-lawsuit-ruling"/>
<id>https://www.theverge.com/2023/5/26/23739273/google-sonos-smart-speaker-patent-lawsuit-ruling</id>
<author>
<name>Chris Welch</name>
</author>
</entry>
</feed>

0 comments on commit f2cc211

Please sign in to comment.