Skip to content

Commit

Permalink
[FLINK-12473][ml] Add ML pipeline and MLlib interface
Browse files Browse the repository at this point in the history
This closes apache#8402
  • Loading branch information
pltbkd authored and shaoxuan-wang committed May 24, 2019
1 parent a22443c commit 3050957
Show file tree
Hide file tree
Showing 17 changed files with 1,474 additions and 0 deletions.
45 changes: 45 additions & 0 deletions flink-ml-parent/flink-ml-api/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>org.apache.flink</groupId>
<artifactId>flink-ml-parent</artifactId>
<version>1.9-SNAPSHOT</version>
</parent>

<artifactId>flink-ml-api</artifactId>

<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-jackson</artifactId>
<version>${jackson.version}-${flink.shaded.version}</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.ml.api.core;

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;

/**
* Estimators are {@link PipelineStage}s responsible for training and generating machine learning
* models.
*
* <p>The implementations are expected to take an input table as training samples and generate a
* {@link Model} which fits these samples.
*
* @param <E> class type of the Estimator implementation itself, used by {@link
* org.apache.flink.ml.api.misc.param.WithParams}.
* @param <M> class type of the {@link Model} this Estimator produces.
*/
@PublicEvolving
public interface Estimator<E extends Estimator<E, M>, M extends Model<M>> extends PipelineStage<E> {

/**
* Train and produce a {@link Model} which fits the records in the given {@link Table}.
*
* @param tEnv the table environment to which the input table is bound.
* @param input the table with records to train the Model.
* @return a model trained to fit on the given Table.
*/
M fit(TableEnvironment tEnv, Table input);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.ml.api.core;

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.table.api.Table;

/**
* A model is an ordinary {@link Transformer} except how it is created. While ordinary transformers
* are defined by specifying the parameters directly, a model is usually generated by an {@link
* Estimator} when {@link Estimator#fit(org.apache.flink.table.api.TableEnvironment, Table)} is
* invoked.
*
* <p>We separate Model from {@link Transformer} in order to support potential
* model specific logic such as linking a Model to the {@link Estimator} from which the model was
* generated.
*
* @param <M> The class type of the Model implementation itself, used by {@link
* org.apache.flink.ml.api.misc.param.WithParams}
*/
@PublicEvolving
public interface Model<M extends Model<M>> extends Transformer<M> {
}
Loading

0 comments on commit 3050957

Please sign in to comment.