-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
FEATURE: week1 algo part 2 - initial code commit, WordNet implemented…
…, optimization still required.
- Loading branch information
Showing
6 changed files
with
288 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
~ Copyright (c) 2007-2012 Artigile. | ||
~ Software development company. | ||
~ All Rights Reserved. | ||
~ | ||
~ This software is the confidential and proprietary information of Artigile. ("Confidential Information"). | ||
~ You shall not disclose such Confidential Information and shall use it only in accordance with the terms of the | ||
~ license agreement you entered into with Artigile software company. | ||
--> | ||
|
||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
|
||
<modelVersion>4.0.0</modelVersion> | ||
<parent> | ||
<groupId>com.artigile.coursera</groupId> | ||
<artifactId>mainpom</artifactId> | ||
<version>1.0</version> | ||
<relativePath>../../pom.xml</relativePath> | ||
</parent> | ||
<groupId>com.artigile.coursera</groupId> | ||
<artifactId>algo2</artifactId> | ||
<packaging>jar</packaging> | ||
<name>Algorythms</name> | ||
<version>1.0</version> | ||
|
||
<dependencies> | ||
|
||
<dependency> | ||
<groupId>com.google.guava</groupId> | ||
<artifactId>guava</artifactId> | ||
</dependency> | ||
|
||
|
||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
</dependencies> | ||
|
||
|
||
<build> | ||
<plugins> | ||
|
||
</plugins> | ||
</build> | ||
|
||
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/** | ||
* User: ioanbsu | ||
* Date: 11/4/13 | ||
* Time: 8:26 AM | ||
*/ | ||
public class Outcast { | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
/** | ||
* User: ioanbsu | ||
* Date: 11/4/13 | ||
* Time: 2:02 PM | ||
*/ | ||
public class SAP { | ||
|
||
// constructor takes a digraph (not necessarily a DAG) | ||
public SAP(Digraph G) { | ||
|
||
} | ||
|
||
// for unit testing of this class (such as the one below) | ||
public static void main(String[] args) { | ||
|
||
} | ||
|
||
// length of shortest ancestral path between v and w; -1 if no such path | ||
public int length(int v, int w) { | ||
return -1; | ||
} | ||
|
||
// a common ancestor of v and w that participates in a shortest ancestral path; -1 if no such path | ||
public int ancestor(int v, int w) { | ||
return -1; | ||
} | ||
|
||
// length of shortest ancestral path between any vertex in v and any vertex in w; -1 if no such path | ||
public int length(Iterable<Integer> v, Iterable<Integer> w) { | ||
return -1; | ||
} | ||
|
||
// a common ancestor that participates in shortest ancestral path; -1 if no such path | ||
public int ancestor(Iterable<Integer> v, Iterable<Integer> w) { | ||
return -1; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
import com.google.common.base.Joiner; | ||
import com.google.common.io.Files; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.nio.charset.Charset; | ||
import java.util.*; | ||
|
||
/** | ||
* User: ioanbsu | ||
* Date: 11/4/13 | ||
* Time: 8:26 AM | ||
*/ | ||
public class WordNet { | ||
|
||
Map<Integer, Set<Integer>> wordNetgraph = new HashMap<Integer, Set<Integer>>(); | ||
Map<Integer, Set<Integer>> wordBidirectedNetgraph = new HashMap<Integer, Set<Integer>>(); | ||
Map<Integer, Set<String>> synset = new HashMap<Integer, Set<String>>(); | ||
Map<Integer, String> def = new HashMap<Integer, String>(); | ||
Map<String, Integer> nouns = new HashMap<String, Integer>(); | ||
Map<Integer, Set<Integer>> ancestorsMap = new HashMap<Integer, Set<Integer>>(); | ||
|
||
// constructor takes the name of the two input files | ||
public WordNet(String synsets, String hypernyms) { | ||
buildHypernyms(hypernyms); | ||
buildSynsets(synsets); | ||
System.out.println(distance("1820s", "1750s")); | ||
System.out.println(distance("1750s", "1820s")); | ||
System.out.println(distance("1790s", "1850s")); | ||
System.out.println(distance("1850s", "1790s")); | ||
System.out.println(sap("1850s", "1790s")); | ||
System.out.println(sap("1850s", "18-karat_gold")); | ||
System.out.println(sap("1790s", "18-karat_gold")); | ||
System.out.println(sap("1820s", "1830s")); | ||
} | ||
|
||
// for unit testing of this class | ||
public static void main(String[] args) { | ||
new WordNet(args[0], args[1]); | ||
} | ||
|
||
// the set of nouns (no duplicates), returned as an Iterable | ||
public Iterable<String> nouns() { | ||
return nouns.keySet(); | ||
} | ||
|
||
// is the word a WordNet noun? | ||
public boolean isNoun(String word) { | ||
return nouns.keySet().contains(word); | ||
|
||
} | ||
|
||
// distance between nounA and nounB (defined below) | ||
public int distance(String nounA, String nounB) { | ||
//do bfs here | ||
Integer nounAid = nouns.get(nounA); | ||
Integer nounBid = nouns.get(nounB); | ||
|
||
int[] edgeTo = new int[synset.keySet().size()]; | ||
boolean[] marked = new boolean[synset.keySet().size()]; | ||
|
||
Set<Integer> dfsQueuePath = new LinkedHashSet<Integer>(); | ||
dfsQueuePath.add(nounAid); | ||
marked[nounAid] = true; | ||
|
||
int distance = 0; | ||
while (!dfsQueuePath.isEmpty()) { | ||
Integer nextToken = dfsQueuePath.iterator().next(); | ||
dfsQueuePath.remove(nextToken); | ||
if (nextToken.equals(nounBid)) { | ||
while (true) { | ||
distance++; | ||
if (edgeTo[nextToken] == nounAid) { | ||
break; | ||
} | ||
nextToken = edgeTo[nextToken]; | ||
} | ||
break; | ||
} | ||
for (Integer child : wordBidirectedNetgraph.get(nextToken)) { | ||
if (!marked[child]) { | ||
dfsQueuePath.add(child); | ||
marked[child] = true; | ||
edgeTo[child] = nextToken; | ||
} | ||
} | ||
} | ||
return distance; | ||
} | ||
|
||
// a synset (second field of synsets.txt) that is the common ancestor of nounA and nounB | ||
// in a shortest ancestral path (defined below) | ||
public String sap(String nounA, String nounB) { | ||
LinkedHashSet<Integer> ancesorsA = getAncestors(nouns.get(nounA)); | ||
LinkedHashSet<Integer> ancesorsB = getAncestors(nouns.get(nounB)); | ||
for (Integer ancesorA : ancesorsA) { | ||
for (Integer ancesorB : ancesorsB) { | ||
if (ancesorA.equals(ancesorB)) { | ||
return Joiner.on(" ").join(synset.get(ancesorA)); | ||
} | ||
} | ||
} | ||
return null; | ||
} | ||
|
||
private void buildSynsets(String synsets) { | ||
try { | ||
List<String> synsetsFileToStringList = Files.readLines(new File(synsets), Charset.defaultCharset()); | ||
for (String configStr : synsetsFileToStringList) { | ||
String[] values = configStr.split(","); | ||
int fieldId = convertIntToInteger(values[0]); | ||
Set<String> foundSynset = new HashSet<String>(Arrays.asList(values[1].split(" "))); | ||
for (String synonym : foundSynset) { | ||
nouns.put(synonym, fieldId); | ||
} | ||
synset.put(fieldId, foundSynset); | ||
def.put(fieldId, values[2]); | ||
} | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
} | ||
|
||
private void buildHypernyms(String hypernyms) { | ||
try { | ||
List<String> hypernymsFileToStringList = Files.readLines(new File(hypernyms), Charset.defaultCharset()); | ||
for (String configStr : hypernymsFileToStringList) { | ||
String[] values = configStr.split(","); | ||
int child = convertIntToInteger(values[0]); | ||
createMapIfNecessary(child, wordNetgraph); | ||
createMapIfNecessary(child, wordBidirectedNetgraph); | ||
createMapIfNecessary(child, ancestorsMap); | ||
if (values.length > 1) { | ||
for (int i = 1; i < values.length; i++) { | ||
int parent = convertIntToInteger(values[i]); | ||
createMapIfNecessary(parent, wordNetgraph); | ||
createMapIfNecessary(parent, wordBidirectedNetgraph); | ||
createMapIfNecessary(parent, ancestorsMap); | ||
wordNetgraph.get(parent).add(child); | ||
wordBidirectedNetgraph.get(parent).add(child); | ||
wordBidirectedNetgraph.get(child).add(parent); | ||
ancestorsMap.get(child).add(parent); | ||
} | ||
} | ||
} | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
} | ||
|
||
private LinkedHashSet<Integer> getAncestors(int node) { | ||
LinkedHashSet<Integer> mainAnscestorsQueue = new LinkedHashSet<Integer>(); | ||
mainAnscestorsQueue.add(node); | ||
LinkedHashSet<Integer> checkQueue = new LinkedHashSet<Integer>(); | ||
for (Integer integer : ancestorsMap.get(node)) { | ||
if (mainAnscestorsQueue.add(integer)) { | ||
checkQueue.addAll(ancestorsMap.get(integer)); | ||
} | ||
} | ||
for (Integer queueNode : checkQueue) { | ||
mainAnscestorsQueue.addAll(getAncestors(queueNode)); | ||
} | ||
return mainAnscestorsQueue; | ||
} | ||
|
||
private Integer convertIntToInteger(String value) { | ||
return Integer.valueOf(value.replace(" ", "")); | ||
} | ||
|
||
private void createMapIfNecessary(int child, Map<Integer, Set<Integer>> graph) { | ||
if (!graph.containsKey(child)) { | ||
graph.put(child, new HashSet<Integer>()); | ||
} | ||
} | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
13 | ||
11 | ||
7 3 | ||
8 3 | ||
3 1 | ||
4 1 | ||
5 1 | ||
9 5 | ||
10 5 | ||
11 10 | ||
12 10 | ||
1 0 | ||
2 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters