Skip to content

Commit

Permalink
Imps: 1. move exotic-ml to exotic-spark
Browse files Browse the repository at this point in the history
  • Loading branch information
platonai committed Nov 17, 2023
1 parent a388aa7 commit 337d27f
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 78 deletions.
58 changes: 25 additions & 33 deletions exotic-app/exotic-ML-examples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,22 @@
<artifactId>exotic-ML-examples</artifactId>
<name>Exotic ML Examples</name>

<build>
<defaultGoal>package</defaultGoal>

<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>ai.platon.exotic.examples.ml.unsupervised.topEc.english.amazon.AmazonScanHarvestKt</mainClass>
<addResources>true</addResources>
<executable>true</executable>
</configuration>
</plugin>
</plugins>
</build>

<dependencies>
<!-- Internal dependency -->
<dependency>
Expand Down Expand Up @@ -89,6 +105,7 @@
</dependency>

<!-- Required by ai.platon.pulsar.browser.driver.chrome.impl.Chrome -->
<!-- TODO: remove jackson-databind -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
Expand All @@ -104,43 +121,18 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.jpmml</groupId>
<artifactId>pmml-model-jackson</artifactId>
</dependency>
<dependency>
<groupId>org.jpmml</groupId>
<artifactId>pmml-evaluator-metro</artifactId>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.jpmml</groupId>-->
<!-- <artifactId>pmml-model-jackson</artifactId>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>org.jpmml</groupId>-->
<!-- <artifactId>pmml-evaluator-metro</artifactId>-->
<!-- </dependency>-->

<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
</dependency>
</dependencies>

<build>
<defaultGoal>package</defaultGoal>

<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>ai.platon.exotic.examples.ml.unsupervised.topEc.english.amazon.AmazonScanHarvestKt</mainClass>
<addResources>true</addResources>
<executable>true</executable>
</configuration>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ class AmazonScanHarvester {
val document = session.loadDocument(url)
val encodeOptions = EncodeOptions(labels = labels)
val df = session.encodeNodes(document, encodeOptions) { it.isText && it.nthScreen <= 2 }
val requestBody = df.points.map { it.dataRef.joinToString(" ") }.joinToString("\n")

val requestBody = df.points.joinToString("\n") { it.dataRef.joinToString(" ") }
val request = HttpRequest.newBuilder()
.uri(URI.create("http://localhost:8185/"))
.method("POST", HttpRequest.BodyPublishers.ofString(requestBody))
Expand Down
29 changes: 4 additions & 25 deletions exotic-ml/pom.xml → exotic-spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
<modelVersion>4.0.0</modelVersion>

<groupId>ai.platon.exotic</groupId>
<artifactId>exotic-ml</artifactId>
<artifactId>exotic-spark</artifactId>
<version>1.12.1-SNAPSHOT</version>
<name>Exotic ML</name>
<name>Exotic Spark</name>

<build>
<defaultGoal>package</defaultGoal>
<plugins>
<!-- java -->
<plugin>
Expand Down Expand Up @@ -50,24 +51,6 @@
<target>${javac.target.version}</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.4.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<shadedArtifactAttached>true</shadedArtifactAttached>
<createDependencyReducedPom>true</createDependencyReducedPom>
<minimizeJar>true</minimizeJar>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

Expand All @@ -77,11 +60,6 @@
<artifactId>spark-mllib_2.13</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.4</version>
</dependency>

<!-- Use BareLocalFileSystem instead of HadoopFileSystem, see https://stackoverflow.com/questions/73503205 -->
<dependency>
Expand All @@ -95,6 +73,7 @@
<javac.src.version>11</javac.src.version>
<javac.target.version>11</javac.target.version>

<scala.binary.version>2.13</scala.binary.version>
<spark.version>3.5.0</spark.version>
</properties>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,23 @@

import com.globalmentor.apache.hadoop.fs.BareLocalFileSystem;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.SystemUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.tree.RandomForest;
import org.apache.spark.mllib.tree.model.RandomForestModel;
import org.apache.spark.mllib.util.MLUtils;
import scala.Tuple2;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;

public class RandomForestClassifier implements AutoCloseable {
public class RandomForest implements AutoCloseable {

private int numClasses;
private Path datasetPath;
Expand All @@ -47,13 +46,7 @@ public class RandomForestClassifier implements AutoCloseable {
private JavaSparkContext javaSparkContext;
private SparkContext sparkContext;

public RandomForestClassifier(int numClasses, Path datasetPath) {
this.numClasses = numClasses;
this.datasetPath = datasetPath;
modelPath = SystemUtils.getJavaIoTmpDir().toPath().resolve("pulsar/ml/RandomForestClassifier");
}

public RandomForestClassifier(int numClasses, Path datasetPath, Path modelPath) {
public RandomForest(int numClasses, Path datasetPath, Path modelPath) {
this.numClasses = numClasses;
this.datasetPath = datasetPath;
this.modelPath = modelPath;
Expand Down Expand Up @@ -107,7 +100,7 @@ public void train() throws IOException {
var maxBins = 32;
var seed = new Random().nextInt();

var model = RandomForest.trainClassifier(trainingData, numClasses,
var model = org.apache.spark.mllib.tree.RandomForest.trainClassifier(trainingData, numClasses,
categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins,
seed);

Expand Down Expand Up @@ -159,9 +152,10 @@ public void close() throws Exception {
public static void main(String[] args) throws IOException {
var numClasses = 7;
var datasetPath = Paths.get("data/dom/amazon.dataset.6.labels.txt");
var modelPath = SystemUtils.getJavaIoTmpDir().toPath().resolve("pulsar/ml/RandomForestClassifier");
var modelPath = Paths.get(System.getProperty("user.home") + "/.pulsar/ml/model/spark/RandomForest");
Files.createDirectories(modelPath);

try (var classifier = new RandomForestClassifier(numClasses, datasetPath, modelPath)) {
try (var classifier = new ai.platon.exotic.ml.RandomForest(numClasses, datasetPath, modelPath)) {
classifier.train();
classifier.predict();
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package ai.platon.exotic.ml.server;

import ai.platon.exotic.ml.RandomForestClassifier;
import ai.platon.exotic.ml.RandomForest;
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
import org.apache.commons.lang3.math.NumberUtils;
Expand All @@ -13,9 +13,9 @@

public class MLHandler implements HttpHandler {

private RandomForestClassifier classifier;
private RandomForest classifier;

public void setClassifier(RandomForestClassifier classifier) {
public void setClassifier(RandomForest classifier) {
this.classifier = classifier;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public class MLServer {
public void serve() throws IOException {
Executor threadPoolExecutor = Executors.newFixedThreadPool(3);
HttpServer server = HttpServer.create(new InetSocketAddress(ML_SERVER_PORT), 0);
server.createContext("/ml/predict", new MLHandler());
server.createContext("api/ml/predict", new MLHandler());
server.setExecutor(threadPoolExecutor);
server.start();
}
Expand Down
10 changes: 9 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,21 @@
<module>exotic-common</module>
<module>exotic-driver</module>
<module>exotic-crawl-common</module>
<module>exotic-ml</module>
<module>exotic-services</module>
<module>exotic-server</module>
<module>exotic-standalone</module>
<module>exotic-app</module>
</modules>

<profiles>
<profile>
<id>spark</id>
<modules>
<module>exotic-spark</module>
</modules>
</profile>
</profiles>

<build>
<defaultGoal>install</defaultGoal>

Expand Down

0 comments on commit 337d27f

Please sign in to comment.