Skip to content

Commit

Permalink
Maven Package Version Configuration Fix (feathr-ai#845)
Browse files Browse the repository at this point in the history
* move maven version config to version

Signed-off-by: Yuqing Wei <[email protected]>

* rename functions and variables

Signed-off-by: Yuqing Wei <[email protected]>

* update doc

Signed-off-by: Yuqing Wei <[email protected]>

* update doc

Signed-off-by: Yuqing Wei <[email protected]>

Signed-off-by: Yuqing Wei <[email protected]>
  • Loading branch information
Yuqing-cat authored Nov 10, 2022
1 parent 461f587 commit 2e32e88
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 64 deletions.
106 changes: 56 additions & 50 deletions docs/how-to-guides/feathr-configuration-and-env.md

Large diffs are not rendered by default.

5 changes: 0 additions & 5 deletions feathr_project/feathr/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@
TYPEDEF_ARRAY_DERIVED_FEATURE=f"array<feathr_derived_feature_{REGISTRY_TYPEDEF_VERSION}>"
TYPEDEF_ARRAY_ANCHOR_FEATURE=f"array<feathr_anchor_feature_{REGISTRY_TYPEDEF_VERSION}>"

# Decouple Feathr MAVEN Version from Feathr Python SDK Version
import os
from feathr.version import __version__
FEATHR_MAVEN_VERSION = os.environ.get("FEATHR_MAVEN_VERSION", __version__)
FEATHR_MAVEN_ARTIFACT=f"com.linkedin.feathr:feathr_2.12:{FEATHR_MAVEN_VERSION}"

JOIN_CLASS_NAME="com.linkedin.feathr.offline.job.FeatureJoinJob"
GEN_CLASS_NAME="com.linkedin.feathr.offline.job.FeatureGenJob"
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from databricks_cli.runs.api import RunsApi
from databricks_cli.sdk.api_client import ApiClient
from feathr.constants import *
from feathr.version import get_maven_artifact_fullname
from feathr.spark_provider._abc import SparkJobLauncher
from loguru import logger
from requests.structures import CaseInsensitiveDict
Expand Down Expand Up @@ -166,8 +167,8 @@ def submit_feathr_job(self, job_name: str, main_jar_path: str, main_class_name:

# the feathr main jar file is anyway needed regardless it's pyspark or scala spark
if not main_jar_path:
logger.info(f"Main JAR file is not set, using default package '{FEATHR_MAVEN_ARTIFACT}' from Maven")
submission_params['libraries'][0]['maven'] = { "coordinates": FEATHR_MAVEN_ARTIFACT }
logger.info(f"Main JAR file is not set, using default package '{get_maven_artifact_fullname()}' from Maven")
submission_params['libraries'][0]['maven'] = { "coordinates": get_maven_artifact_fullname() }
else:
submission_params['libraries'][0]['jar'] = self.upload_or_get_cloud_path(main_jar_path)
# see here for the submission parameter definition https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/2.0/jobs#--request-structure-6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from loguru import logger
from pyspark import *

from feathr.constants import FEATHR_MAVEN_ARTIFACT
from feathr.version import get_maven_artifact_fullname
from feathr.spark_provider._abc import SparkJobLauncher


Expand Down Expand Up @@ -77,7 +77,7 @@ def submit_feathr_job(

# Get conf and package arguments
cfg = configuration.copy() if configuration else {}
maven_dependency = f"{cfg.pop('spark.jars.packages', self.packages)},{FEATHR_MAVEN_ARTIFACT}"
maven_dependency = f"{cfg.pop('spark.jars.packages', self.packages)},{get_maven_artifact_fullname()}"
spark_args = self._init_args(job_name=job_name, confs=cfg)

if not main_jar_path:
Expand All @@ -86,7 +86,7 @@ def submit_feathr_job(
# This is a JAR job
# Azure Synapse/Livy doesn't allow JAR job starts from Maven directly, we must have a jar file uploaded.
# so we have to use a dummy jar as the main file.
logger.info(f"Main JAR file is not set, using default package '{FEATHR_MAVEN_ARTIFACT}' from Maven")
logger.info(f"Main JAR file is not set, using default package '{get_maven_artifact_fullname()}' from Maven")
# Use the no-op jar as the main file
# This is a dummy jar which contains only one `org.example.Noop` class with one empty `main` function
# which does nothing
Expand Down
7 changes: 4 additions & 3 deletions feathr_project/feathr/spark_provider/_synapse_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from feathr.spark_provider._abc import SparkJobLauncher
from feathr.constants import *
from feathr.version import get_maven_artifact_fullname

class LivyStates(Enum):
""" Adapt LivyStates over to relax the dependency for azure-synapse-spark pacakge.
Expand Down Expand Up @@ -114,12 +115,12 @@ def submit_feathr_job(self, job_name: str, main_jar_path: str = None, main_clas
if not main_jar_path:
# We don't have the main jar, use Maven
# Add Maven dependency to the job configuration
logger.info(f"Main JAR file is not set, using default package '{FEATHR_MAVEN_ARTIFACT}' from Maven")
logger.info(f"Main JAR file is not set, using default package '{get_maven_artifact_fullname()}' from Maven")
if "spark.jars.packages" in cfg:
cfg["spark.jars.packages"] = ",".join(
[cfg["spark.jars.packages"], FEATHR_MAVEN_ARTIFACT])
[cfg["spark.jars.packages"], get_maven_artifact_fullname()])
else:
cfg["spark.jars.packages"] = FEATHR_MAVEN_ARTIFACT
cfg["spark.jars.packages"] = get_maven_artifact_fullname()

if not python_files:
# This is a JAR job
Expand Down
11 changes: 10 additions & 1 deletion feathr_project/feathr/version.py
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
__version__ = "0.9.0-rc2"
__version__ = "0.9.0-rc2"

def get_version():
return __version__

# Decouple Feathr MAVEN Version from Feathr Python SDK Version
import os
def get_maven_artifact_fullname():
maven_artifact_version = os.environ.get("MAVEN_ARTIFACT_VERSION", __version__)
return f"com.linkedin.feathr:feathr_2.12:{maven_artifact_version}"

0 comments on commit 2e32e88

Please sign in to comment.