This repository has been archived by the owner on Jan 30, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 94be44e
Showing
10 changed files
with
548 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
//Code taken (and then modified) from "Data Abstraction and Problem Solving" by Janet J. Prichard | ||
|
||
public class BST{ | ||
|
||
TreeNode root; | ||
int count; | ||
Term newTerm; | ||
String documentName; | ||
boolean newword = false; | ||
int c = 0; | ||
|
||
|
||
public BST(){ | ||
root = null; | ||
count = 0; | ||
} | ||
|
||
public Term add(String documentName, String word){ | ||
this.documentName = documentName; | ||
newTerm = new Term(word); | ||
root = insertItem(root, newTerm); | ||
if(newword == true){ | ||
newTerm.incFrequency(documentName); | ||
} | ||
return newTerm; | ||
|
||
} | ||
|
||
public Term get(String word, Boolean printDepth){ | ||
if(printDepth == true){ | ||
count = 0; | ||
Term term = retrieveItem(root, word); | ||
System.out.println(" At depth " + count); | ||
return term; | ||
} | ||
return retrieveItem(root, word); | ||
} | ||
|
||
protected TreeNode insertItem(TreeNode tNode, Term newItem){ | ||
TreeNode newSubtree; | ||
|
||
if(tNode == null){ | ||
tNode = new TreeNode(newItem, null, null); | ||
if(c == 0){ | ||
newItem.incFrequency(documentName); | ||
} | ||
return tNode; | ||
} | ||
c++; | ||
Term nodeItem = tNode.item; | ||
if (newItem.word.compareTo(nodeItem.word) == 0){ | ||
nodeItem.incFrequency(documentName); | ||
return tNode; | ||
} | ||
else if(newItem.word.compareTo(nodeItem.word) < 0){ | ||
newSubtree = insertItem(tNode.leftChild, newItem); | ||
tNode.leftChild = newSubtree; | ||
newword = true; | ||
return tNode; | ||
} | ||
else{ | ||
newSubtree = insertItem(tNode.rightChild, newItem); | ||
tNode.rightChild = newSubtree; | ||
newword = true; | ||
return tNode; | ||
} | ||
|
||
} | ||
|
||
protected Term retrieveItem(TreeNode tNode, String word){ | ||
|
||
Term treeItem; | ||
if (tNode == null){ | ||
treeItem = null; | ||
} | ||
else{ | ||
Term nodeItem = tNode.item; | ||
if(word.compareTo(nodeItem.word) == 0){ | ||
treeItem = tNode.item; | ||
} | ||
else if(word.compareTo(nodeItem.word) < 0){ | ||
treeItem = retrieveItem(tNode.leftChild, word); | ||
} | ||
else{ | ||
treeItem = retrieveItem(tNode.rightChild, word); | ||
} | ||
} | ||
count++; | ||
return treeItem; | ||
} | ||
|
||
|
||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
// LinkedList.java | ||
// Author: NameHere | ||
// Date: DateHere | ||
// Class: cs200 | ||
// P0 | ||
public class LinkedList { | ||
|
||
public Node head; | ||
private int size; | ||
|
||
public LinkedList() | ||
{ | ||
head=null; | ||
size=0; | ||
} | ||
|
||
public void add(String s) | ||
{ | ||
//increment size of LinkedList | ||
size+=1; | ||
Node n = new Node(s); | ||
//case: LinkedList is Empty | ||
if(head == null) | ||
{ | ||
head = n; | ||
|
||
} | ||
//otherwise add to tail of LinkedList | ||
else | ||
{ | ||
Node temp = head; | ||
while(temp.getNext()!=null) | ||
{ | ||
//this iterates to the next Node in the LinkedList | ||
temp=temp.getNext(); | ||
} | ||
//This sets the pointer of temp to the Node that we want to add. | ||
temp.setNext(n); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
// Node.java | ||
// Author: NameHere | ||
// Date: Date Here | ||
// Class: cs200 | ||
// P0 | ||
public class Node { | ||
|
||
private String data; | ||
private Node next; | ||
Occurrence doc; | ||
|
||
public Node(String s) | ||
{ | ||
doc = new Occurrence(s); | ||
this.data=s; | ||
this.next=null; | ||
} | ||
public String getDocName(){ | ||
return doc.docName; | ||
} | ||
public int getTermFrequency(){ | ||
return doc.termFrequency; | ||
} | ||
public String getData() { | ||
return data; | ||
} | ||
public Node getNext() { | ||
return next; | ||
} | ||
public void setNext(Node next) { | ||
this.next = next; | ||
} | ||
public void incFrequency(){ | ||
doc.incFrequency(); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
|
||
public class Occurrence { | ||
|
||
public int termFrequency; | ||
public String docName; | ||
|
||
public Occurrence(String name){ | ||
this.docName = name; | ||
termFrequency = 1; | ||
} | ||
|
||
public void incFrequency(){ | ||
termFrequency++; | ||
} | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import java.io.File; | ||
import java.io.FileNotFoundException; | ||
import java.util.Scanner; | ||
|
||
|
||
public class PA3 { | ||
|
||
public static void main(String[] args) { | ||
|
||
WebPages web = new WebPages(); | ||
|
||
try { | ||
File file = new File(args[0]); | ||
Scanner scan; | ||
scan = new Scanner(file); | ||
String nextArg = scan.nextLine(); | ||
|
||
//adding files/creating new webPages | ||
while(!(nextArg.equals("*EOFs*"))){ | ||
web.addPage(nextArg); | ||
nextArg = scan.nextLine(); | ||
} | ||
|
||
System.out.println("WORDS"); | ||
web.printTree(); | ||
|
||
//skip EOFs | ||
nextArg = scan.nextLine(); | ||
|
||
System.out.println(); | ||
|
||
|
||
//scanning though all whichPages words | ||
while(!(nextArg == null)){ | ||
String[] pages = web.whichPages(nextArg.toLowerCase()); | ||
if(pages == null){ | ||
System.out.println(nextArg + " not found"); | ||
} | ||
else{ | ||
System.out.print(nextArg + " in pages: "); | ||
for(int i = 0; i < pages.length-1; i++){ | ||
System.out.print(pages[i] + ": "); | ||
if(i < pages.length-2){ | ||
i++; | ||
System.out.print(pages[i] + ", "); | ||
} | ||
} | ||
if(pages.length > 0){ | ||
System.out.println(pages[pages.length-1]); | ||
} | ||
else{ | ||
System.out.println(); | ||
} | ||
|
||
} | ||
if(scan.hasNext()){ | ||
nextArg = scan.nextLine(); | ||
} | ||
else{ | ||
break; | ||
} | ||
} | ||
|
||
scan.close(); | ||
} catch (FileNotFoundException e) { | ||
// TODO Auto-generated catch block | ||
e.printStackTrace(); | ||
} | ||
|
||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import java.text.DecimalFormat; | ||
|
||
|
||
public class Term extends WebPages{ | ||
|
||
public String word; | ||
public int docFrequency; | ||
public int totalFrequency; | ||
LinkedList list = new LinkedList(); | ||
public Node first; | ||
boolean inList; | ||
|
||
//Construction of a term | ||
public Term(String name){ | ||
this.word = name; | ||
docFrequency = 0; | ||
} | ||
|
||
//increment frequency for terms and add documents to linked list | ||
public void incFrequency(String document){ | ||
totalFrequency++; | ||
|
||
//if empty, insert occurrence | ||
if(first == null){ | ||
first = new Node(document); | ||
inList = true; | ||
list.add(document); | ||
docFrequency++; | ||
} | ||
|
||
//loop through list | ||
while(first != null){ | ||
//if it is in list, inc frequency and set inList to true | ||
if(first.getData().equals(document)){ | ||
//increment total frequency | ||
first.incFrequency(); | ||
inList = true; | ||
break; | ||
} | ||
// else it is not in list | ||
else{ | ||
inList = false; | ||
} | ||
|
||
first = first.getNext(); | ||
} | ||
|
||
//if not in list, add to end | ||
if(inList == false){ | ||
list.add(document); | ||
docFrequency++; | ||
} | ||
|
||
//resets first to front of linked list | ||
first = list.head; | ||
|
||
} | ||
|
||
|
||
// fills array of Document names and TFIDF for a given Term | ||
public String[] fillArray(){ | ||
|
||
//create array to return | ||
String[] list = new String[0]; | ||
int index = 0; | ||
|
||
// loop through linked list of document names | ||
while(first != null){ | ||
//reallocating size of array | ||
String[] temp = new String[list.length]; | ||
temp = list; | ||
// increase array size by 2 (1 space for Document and 1 space for TFIDF) | ||
list = new String[index+2]; | ||
// copy data from temp array back to array w/ realocated size | ||
for(int i = 0; i < temp.length; i++){ | ||
list[i] = temp[i]; | ||
} | ||
|
||
//adding document name to array | ||
String document = first.getData(); | ||
DecimalFormat df = new DecimalFormat("###0.00"); | ||
|
||
//calculating TFIDF | ||
double tfidf = (float)first.getTermFrequency() * Math.log((float)(totalDoc)/(float)(docFrequency)); | ||
|
||
// adding document name to array | ||
list[index] = document; | ||
// casting TFIDF into String from double w/ DecimalFormatter | ||
String tFIDF = String.valueOf(df.format(tfidf)); | ||
index++; | ||
// adding TFIDF to array | ||
list[index] = tFIDF; | ||
index++; | ||
first = first.getNext(); | ||
|
||
} | ||
//return array of document names | ||
|
||
return list; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
//Code taken from "Data Abstraction and Problem Solving" by Janet J. Prichard pg. 581 | ||
|
||
public class TreeException extends RuntimeException{ | ||
public TreeException(String s){ | ||
super(s); | ||
} | ||
} |
Oops, something went wrong.