Skip to content

Commit

Permalink
Working Project version 0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
UdaySagar committed Apr 20, 2016
1 parent c5dbfea commit 474c81c
Show file tree
Hide file tree
Showing 46 changed files with 966 additions and 24 deletions.
6 changes: 6 additions & 0 deletions .classpath
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,11 @@
<classpathentry kind="lib" path="/home/udaysagar/Downloads/spring-framework-4.2.5.RELEASE/libs/spring-beans-4.2.5.RELEASE.jar"/>
<classpathentry kind="lib" path="/home/udaysagar/Downloads/spring-framework-4.2.5.RELEASE/libs/spring-core-4.2.5.RELEASE.jar"/>
<classpathentry kind="lib" path="/home/udaysagar/Downloads/commons-logging-1.2/commons-logging-1.2.jar"/>
<classpathentry kind="lib" path="/home/udaysagar/workspace_spring/wikiSexIdentifier/WikipediaSpecialExportModeler/stanford-postagger/stanford-postagger-3.6.0.jar"/>
<classpathentry kind="lib" path="/home/udaysagar/workspace_spring/wikiSexIdentifier/WikipediaSpecialExportModeler/stanford-postagger/slf4j-simple.jar"/>
<classpathentry kind="lib" path="/home/udaysagar/workspace_spring/wikiSexIdentifier/WikipediaSpecialExportModeler/stanford-postagger/slf4j-api.jar"/>
<classpathentry kind="lib" path="/home/udaysagar/Downloads/stanford-postagger-full-2015-12-09/stanford-postagger-3.6.0-sources.jar"/>
<classpathentry kind="lib" path="/home/udaysagar/Downloads/stanford-postagger-full-2015-12-09/stanford-postagger.jar"/>
<classpathentry kind="lib" path="/home/udaysagar/Downloads/stanford-postagger-full-2015-12-09/stanford-postagger-3.6.0.jar"/>
<classpathentry kind="output" path="build/classes"/>
</classpath>
Binary file added WebContent/WEB-INF/lib/slf4j-api.jar
Binary file not shown.
Binary file added WebContent/WEB-INF/lib/slf4j-simple.jar
Binary file not shown.
Binary file not shown.
Binary file added WebContent/WEB-INF/models/0/men.mdl
Binary file not shown.
Binary file added WebContent/WEB-INF/models/0/object.mdl
Binary file not shown.
Binary file added WebContent/WEB-INF/models/0/women.mdl
Binary file not shown.
Binary file added WebContent/WEB-INF/models/1/men.mdl
Binary file not shown.
Binary file added WebContent/WEB-INF/models/1/object.mdl
Binary file not shown.
Binary file added WebContent/WEB-INF/models/1/women.mdl
Binary file not shown.
Binary file added WebContent/WEB-INF/models/2/men.mdl
Binary file not shown.
Binary file added WebContent/WEB-INF/models/2/object.mdl
Binary file not shown.
Binary file added WebContent/WEB-INF/models/2/women.mdl
Binary file not shown.
53 changes: 43 additions & 10 deletions WebContent/WEB-INF/pages/index.jsp
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,65 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
<!-- Latest compiled and minified CSS -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous">

<!-- Optional theme -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap-theme.min.css" integrity="sha384-fLW2N01lMqjakBkx3l/M9EahuwpSfeNvV63J5ezn3uZzapT0u7EYsXMjQV+0En5r" crossorigin="anonymous">

<!-- Latest compiled and minified JavaScript -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" crossorigin="anonymous"></script>
<style>
h1 {
text-align: center;
}
input.normal, select {
float: right;
margin-right: 16%;
width: 315px;
}
input.submit {
margin-left:135px
}
</style>
<title>Wiki Page Sex Identification</title>
</head>
<body>
<h1>Wiki Page Sex Identification</h1>
</br></br>
<p> Instructors: Dr. Amit Sheth, Dr. Tanvi Banerjee</p>
<p> Mentor: Sumant Kulkarni</p>
<p> Authors: William Hatfield, Utkarshani Jaimini, Uday Sagar Panjala</p>
</br></br>
</br>
<p style="text-align:center;"> <strong>Instructors:</strong> Dr. Amit Sheth, Dr. Tanvi Banerjee <strong style="margin-left:30px;">Mentor:</strong> Sumant Kulkarni <strong style="margin-left:30px;">Authors:</strong> William Hatfield, Utkarshani Jaimini, Uday Sagar Panjala</p>
</br></br></br></br></br>
<div align="center">
<form action="/wikiSexIdentifier/result" method="post">
<p>
Wikipedia Page Url: <input name="wiki_url" type="text" />
Wikipedia Page Url: <input class="normal" name="wiki_url" type="text" />
</p>
</br>
<p>
Page Sections to be POS Tagged: <select name="page_section_model">
<option value="first_section">First Section</option>
<option value="two_sections">First Two Sections</option>
<option value="whole_page">Whole Wiki Page</option>
<option value="1">First Section</option>
<option value="2">First Two Sections</option>
<option value="0">Whole Wiki Page</option>
</select>
</p>
</br>
<p>
Models to be compared against: <select name="models_to_compare">
<option value="1">First Section</option>
<option value="2">First Two Sections</option>
<option value="0">Whole Wiki Page</option>
</select>
</p>
</br>
<p>
Classifier to be used: <select name="classifier_name">
<option value="naive_bayes">Naive Bayes Classifier</option>
</select>
</p>
<input type="submit" value="Let me know the result!" />
</br></br>
<input class="submit" type="submit" value="Let me know the result!" />
</form>
</div>
</body>
</html>
35 changes: 27 additions & 8 deletions WebContent/WEB-INF/pages/result.jsp
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,42 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js"></script>
<!-- Latest compiled and minified CSS -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous">

<!-- Optional theme -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap-theme.min.css" integrity="sha384-fLW2N01lMqjakBkx3l/M9EahuwpSfeNvV63J5ezn3uZzapT0u7EYsXMjQV+0En5r" crossorigin="anonymous">

<!-- Latest compiled and minified JavaScript -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js" integrity="sha384-0mSbJDEHialfmuBBQP6A4Qrprq5OVfW37PRR3j5ELqxss1yVqOtnepnHVP9aJ7xS" crossorigin="anonymous"></script>
<style>
h1 {
text-align: center;
}
</style>
<title>Wiki Page Sex Identification</title>
</head>
<body>
<h1>Wiki Page Sex Identification Result Page</h1>
</br></br>
<p> Instructors: Dr. Amit Sheth, Dr. Tanvi Banerjee</p>
<p> Mentor: Sumant Kulkarni</p>
<p> Authors: William Hatfield, Utkarshani Jaimini, Uday Sagar Panjala</p>
<h1>Wiki Page Sex Identification Results Page</h1>
</br>
<p style="text-align:center;"> <strong>Instructors:</strong> Dr. Amit Sheth, Dr. Tanvi Banerjee <strong style="margin-left:30px;">Mentor:</strong> Sumant Kulkarni <strong style="margin-left:30px;">Authors:</strong> William Hatfield, Utkarshani Jaimini, Uday Sagar Panjala</p>
</br></br></br></br></br>
<div align="center">

<p>The subject of the given Wiki Page, ${wiki_url} is : <strong style="font-size:2em;color:green;">${winner}</strong></p>
</br></br>
<p>
${wiki_url}
${labelOf_first} : <strong style="font-size:2em;">${Score_First}</strong>
</p>
<p>
${page_section_model}
${labelOf_second} : <strong style="font-size:2em;">${Score_Second}</strong>
</p>
<p>
${classifier_name}
${labelOf_third} : <strong style="font-size:2em;">${Score_Third}</strong>
</p>
</br></br></br>
<a href="http://130.108.85.184:8080/wikiSexIdentifier/">Go back to Home Page</a>
</div>
</body>
</html>
Binary file not shown.
1 change: 1 addition & 0 deletions WikipediaSpecialExportModeler
Submodule WikipediaSpecialExportModeler added at 7cfbd4
Binary file added WikipediaSpecialExportModeler (2).zip
Binary file not shown.
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added documents.zip
Binary file not shown.
Binary file added english-left3words-distsim.tagger
Binary file not shown.
Binary file added models/0/men.mdl
Binary file not shown.
Binary file added models/0/object.mdl
Binary file not shown.
Binary file added models/0/women.mdl
Binary file not shown.
Binary file added models/1/men.mdl
Binary file not shown.
Binary file added models/1/object.mdl
Binary file not shown.
Binary file added models/1/women.mdl
Binary file not shown.
Binary file added models/2/men.mdl
Binary file not shown.
Binary file added models/2/object.mdl
Binary file not shown.
Binary file added models/2/women.mdl
Binary file not shown.
177 changes: 171 additions & 6 deletions src/com/wikisexidentifier/SexidentifierController.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
package com.wikisexidentifier;

import projectutilities.*;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.servlet.ModelAndView;

import java.io.*;
import java.net.*;
import java.util.ArrayList;
import java.util.Scanner;

import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;

@Controller
public class SexidentifierController {

Expand All @@ -20,13 +27,171 @@ public ModelAndView homePage() {
}

@RequestMapping(value="/result", method=RequestMethod.POST)
public ModelAndView result(@RequestParam("wiki_url") String wiki_url, @RequestParam("page_section_model") String page_section_model, @RequestParam("classifier_name") String classifier_name) throws Exception {
public ModelAndView result(@RequestParam("wiki_url") String wiki_url, @RequestParam("page_section_model") String page_section_model, @RequestParam("classifier_name") String classifier_name, @RequestParam("models_to_compare") String models_to_compare) throws Exception {

String raw_media_wiki_content = new Scanner(new URL(wiki_url+"?action=raw").openStream(), "UTF-8").useDelimiter("\\A").next();

WikipediaSpecialExportProcessor wsep = new WikipediaSpecialExportProcessor();
ArrayList<String> pageParagraphs = wsep.getParagraphsFromMediaWikiString(raw_media_wiki_content);
StanfordSpeechTaggerAndCounter sstc = new StanfordSpeechTaggerAndCounter();

System.out.println(page_section_model);
int paragraphs = Integer.parseInt(page_section_model);
if (paragraphs == 0) paragraphs = pageParagraphs.size();

ArrayList<String> tagged_paragraphs = new ArrayList<String>(pageParagraphs.size());

for (String s : pageParagraphs) {
tagged_paragraphs.add(sstc.tagNormalizedString(s));
}

TermCountProbabilityModel tcpm = new TermCountProbabilityModel();
ArrayList<String> pronouns = new ArrayList<String>();

for (int i = 0; i < paragraphs; i++) {
String para = tagged_paragraphs.get(i);
String[] words = para.split(" ");

for (String word : words) {
//System.out.println(word);
if (word.endsWith("_PRP")) {
String stripped = word.replace("_PRP", "");
String lo = stripped.toLowerCase();
tcpm.pushTerm(lo);
pronouns.add(lo);
//System.out.println(word);
} else if (word.endsWith("_PRP$")) {
String stripped = word.replace("_PRP$", "");
String lo = stripped.toLowerCase();
tcpm.pushTerm(lo);
pronouns.add(lo);
}
}
}
System.out.println(">> All Pronouns: " + pronouns);
//System.out.println(tcpm.toString());
if (tcpm.getModelSize() == 0) {
System.out.println("Required features used for the classifer not found");
}

String obj_filename = "", wom_filename = "", men_filename = "";

int model_paragraphs = Integer.parseInt(models_to_compare);


// switch (model_paragraphs) {
// default: {
// obj_filename = "/home/udaysagar/workspace_spring/wikiSexIdentifier/models/0/object.mdl";
// wom_filename = "/home/udaysagar/workspace_spring/wikiSexIdentifier/models/0/women.mdl";
// men_filename = "/home/udaysagar/workspace_spring/wikiSexIdentifier/models/0/men.mdl";
// break;
// }
// case 1: {
// obj_filename = "/home/udaysagar/workspace_spring/wikiSexIdentifier/models/1/object.mdl";
// wom_filename = "/home/udaysagar/workspace_spring/wikiSexIdentifier/models/1/women.mdl";
// men_filename = "/home/udaysagar/workspace_spring/wikiSexIdentifier/models/1/men.mdl";
// break;
// }
// case 2: {
// obj_filename = "/home/udaysagar/workspace_spring/wikiSexIdentifier/models/2/object.mdl";
// wom_filename = "/home/udaysagar/workspace_spring/wikiSexIdentifier/models/2/women.mdl";
// men_filename = "/home/udaysagar/workspace_spring/wikiSexIdentifier/models/2/men.mdl";
// break;
// }
// }

switch (model_paragraphs) {
default: {
obj_filename = "/opt/tomcat/wikiSexIdentifier/models/0/object.mdl";
wom_filename = "/opt/tomcat/wikiSexIdentifier/models/0/women.mdl";
men_filename = "/opt/tomcat/wikiSexIdentifier/models/0/men.mdl";
break;
}
case 1: {
obj_filename = "/opt/tomcat/wikiSexIdentifier/models/1/object.mdl";
wom_filename = "/opt/tomcat/wikiSexIdentifier/models/1/women.mdl";
men_filename = "/opt/tomcat/wikiSexIdentifier/models/1/men.mdl";
break;
}
case 2: {
obj_filename = "/opt/tomcat/wikiSexIdentifier/models/2/object.mdl";
wom_filename = "/opt/tomcat/wikiSexIdentifier/models/2/women.mdl";
men_filename = "/opt/tomcat/wikiSexIdentifier/models/2/men.mdl";
break;
}
}
//System.out.println(obj_filename);
TermCountProbabilityModel objects_model = new TermCountProbabilityModel(obj_filename);
TermCountProbabilityModel women_model = new TermCountProbabilityModel(wom_filename);
TermCountProbabilityModel men_model = new TermCountProbabilityModel(men_filename);

// System.out.println("Objects");
// System.out.println(objects_model.toString());
// System.out.println("Women:");
// System.out.println(women_model.toString());
// System.out.println("Men");
// System.out.println(men_model.toString());

int vocab = 0;
vocab += objects_model.getVocabulary().size();
vocab += women_model.getVocabulary().size();
vocab += men_model.getVocabulary().size();

tcpm.computeTheTermProbabilites(vocab);

double obj_prob = objects_model.getClassProbability(tcpm, vocab);
double wom_prob = women_model.getClassProbability(tcpm, vocab);
double men_prob = men_model.getClassProbability(tcpm, vocab);

String winner = "";
Double score;
if (obj_prob > wom_prob) {
score = obj_prob;
winner = "Object";
} else {
score = wom_prob;
winner = "Female";
}

if (men_prob > score) {
score = men_prob;
winner = "Male";
}

//ArrayList<Double> arrayListOfProbs = new ArrayList<Double>();

//Collections.sort(arrayListOfProbs);

//FileSystemResource male_model = new FileSystemResource("/WEB-INF/models/0/");

// String[] result = {"Object Class Score:80%", "Female Class Score:60%", "Male Class Score:40%"};
// String[] first_result = result[0].split(":");
// String labelOf_first = first_result[0];
//String probabilityOf_first = first_result[1];
String probabilityOf_first = "" + obj_prob;

// String[] second_result = result[1].split(":");
// String labelOf_second = second_result[0];
//String probabilityOf_second = second_result[1];
String probabilityOf_second = "" + wom_prob;

// String[] third_result = result[2].split(":");
// String labelOf_third = third_result[0];
//String probabilityOf_third = third_result[1];
String probabilityOf_third = "" + men_prob;

String raw_media_wiki_content = new Scanner(new URL(wiki_url+"?action=raw").openStream(), "UTF-8").useDelimiter("\\A").next();
ModelAndView modelandview = new ModelAndView("result");
modelandview.addObject("wiki_url", raw_media_wiki_content);
modelandview.addObject("page_section_model", page_section_model);
modelandview.addObject("classifier_name", classifier_name);
modelandview.addObject("labelOf_first", "Object Class Score");
modelandview.addObject("Score_First", probabilityOf_first);

modelandview.addObject("labelOf_second", "Female Class Score");
modelandview.addObject("Score_Second", probabilityOf_second);

modelandview.addObject("labelOf_third", "Male Class Score");
modelandview.addObject("Score_Third", probabilityOf_third);

modelandview.addObject("wiki_url", wiki_url);
modelandview.addObject("winner", winner);

return modelandview;
}
Expand Down
Binary file not shown.
Loading

0 comments on commit 474c81c

Please sign in to comment.