forked from byzer-org/byzer-lang
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request byzer-org#1322 from allwefantasy/TRY
Try
- Loading branch information
Showing
5 changed files
with
300 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
# 制作一张动态报表 | ||
|
||
利用前面我们获得数据,现在可以使用这些数据然后使用python绘制报表了: | ||
|
||
```sql | ||
load delta.`python_data.vega_datasets` as vega_datasets; | ||
|
||
!python env "PYTHON_ENV=source activate dev"; | ||
!python conf "schema=st(field(content,string))"; | ||
|
||
select * from vega_datasets where Year > 1990 as dash_data; | ||
|
||
!ray on dash_data ''' | ||
import pandas as pd | ||
import plotly.express as px | ||
from plotly.io import to_html | ||
from vega_datasets import data | ||
from pyjava.api.mlsql import PythonContext,RayContext | ||
ray_context = RayContext.connect(globals(),None) | ||
data = list(ray_context.collect()) | ||
df = pd.DataFrame(data, columns=data[0].keys()) | ||
fig = px.bar(df, | ||
y="Entity", | ||
x="Deaths", | ||
animation_frame="Year", | ||
orientation='h', | ||
range_x=[0, df.Deaths.max()], | ||
color="Entity") | ||
# improve aesthetics (size, grids etc.) | ||
fig.update_layout(width=1000, | ||
height=800, | ||
xaxis_showgrid=False, | ||
yaxis_showgrid=False, | ||
paper_bgcolor='rgba(0,0,0,0)', | ||
plot_bgcolor='rgba(0,0,0,0)', | ||
title_text='Evolution of Natural Disasters', | ||
showlegend=False) | ||
fig.update_xaxes(title_text='Number of Deaths') | ||
fig.update_yaxes(title_text='') | ||
html = to_html( | ||
fig, | ||
config={}, | ||
auto_play=False, | ||
include_plotlyjs=True, | ||
include_mathjax="cdn", | ||
post_script=None, | ||
full_html=True, | ||
animation_opts=None, | ||
default_width="50%", | ||
default_height="50%", | ||
validate=False, | ||
) | ||
context.build_result([{"content":html}]) | ||
''' named mlsql_temp_table2; | ||
|
||
select content as html,"" as dash from mlsql_temp_table2 as output; | ||
``` | ||
|
||
渲染结果如下: | ||
|
||
![](http://docs.mlsql.tech/upload_images/WechatIMG80.png) | ||
|
||
因为我们并不需要真的去连接一个Ray集群,我们直接在client执行,所以可以将Ray的URL地址甚至为 | ||
None: | ||
|
||
```python | ||
ray_context = RayContext.connect(globals(),None) | ||
``` | ||
|
||
我们可以通过如下代码获取表dash_data所有的数据: | ||
|
||
```python | ||
data = list(ray_context.collect()) | ||
``` | ||
|
||
`ray_context.collect()` 返回的是generator,所以需要转化为一个list来使用。 | ||
|
||
最后,绘制的图被渲染成html,我们需要将值回传出去: | ||
|
||
```python | ||
context.build_result([{"content":html}]) | ||
``` | ||
|
||
build_result接受一个list,里面是一个map. | ||
|
||
为了能够让MLSQL Console进行渲染,我们需要使用如下的语句: | ||
|
||
```sql | ||
select content as html,"" as dash from mlsql_temp_table2 as output; | ||
``` | ||
|
||
核心是,一个标记指端dash,表示这个表可以被渲染成图标,一个内容字段,html,表示把内容直接当做html进行渲染。 | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,62 @@ | ||
# 把python测试数据集导出到数仓 | ||
|
||
实际场景,我们的数据一般都是会在数仓里。为了方便,同时模拟这个实际情况, | ||
实际场景,我们的数据一般都是会在数仓里。为了模拟这个实际情况,这篇内容会介绍如何将python的数据 | ||
导入到Hive仓库里。 | ||
|
||
完整脚本如下: | ||
|
||
``` | ||
!python env "PYTHON_ENV=source activate dev"; | ||
!python conf "schema=st(field(Entity,string),field(Year,long),field(Deaths,long))"; | ||
!python conf "dataMode=model"; | ||
!ray on command ''' | ||
import plotly.express as px | ||
from plotly.io import to_html | ||
from vega_datasets import data | ||
df = data.disasters() | ||
context.set_output([[df[name] for name in df]]) | ||
''' named mlsql_temp_table2; | ||
save overwrite mlsql_temp_table2 as delta.`python_data.vega_datasets`; | ||
``` | ||
|
||
简单做个说明,前面三行主要是配置Python的一些参数。其中第二行为数据的格式描述。为了获得vega_datasets, | ||
用户可以在MLSQL Console里新建一个python脚本,比如叫`vega_datasets.py`,然后执行如下代码: | ||
|
||
``` | ||
import plotly.express as px | ||
from plotly.io import to_html | ||
df = px.data.gapminder() | ||
print(df.iloc[0]) | ||
``` | ||
|
||
会输出如下内容: | ||
|
||
``` | ||
Year 1900 | ||
Name: 0, dtype: object | ||
Deaths 1267360 | ||
Entity All natural disasters | ||
Entity All natural disasters | ||
Name: 0, dtype: object | ||
Year 1900 | ||
Deaths 1267360 | ||
``` | ||
|
||
这样我们就知道数据结构,从而写出schema了。接着我们使用内嵌python代码获得数据,其中 | ||
|
||
`!ray on command` 中的command表示这个我们并不需要一个真实的数据集。 | ||
|
||
而最后设置输出的代码: | ||
|
||
```python | ||
context.set_output([[df[name] for name in df]]) | ||
``` | ||
|
||
可以看到我们需要拿到df的所有列得到一个数组,然后在套一个数组即可。这个是典型的列式传输的模式。 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
# 制作一张交互式报表 | ||
|
||
完整代码如下: | ||
|
||
```sql | ||
|
||
load delta.`python_data.vega_datasets` as vega_datasets; | ||
|
||
!python env "PYTHON_ENV=source activate dev"; | ||
!python conf "schema=st(field(content,string))"; | ||
|
||
select * from vega_datasets where Year > 1990 as dash_data; | ||
|
||
!ray on dash_data ''' | ||
import pandas as pd | ||
import plotly.express as px | ||
from plotly.io import to_html | ||
from vega_datasets import data | ||
from pyjava.api.mlsql import PythonContext,RayContext | ||
import dash | ||
import dash_core_components as dcc | ||
import dash_html_components as html | ||
import plotly.express as px | ||
import ray | ||
from dash.dependencies import Input, Output | ||
ray_context = RayContext.connect(globals(),"192.168.209.29:42207") | ||
data = list(ray_context.collect()) | ||
APP_NAME="jack" | ||
APP_PORT="8051" | ||
@ray.remote | ||
class DashServer(object): | ||
def __init__(self,port): | ||
self.port = port | ||
self.app_dash = dash.Dash(APP_NAME, external_stylesheets=["https://codepen.io/chriddyp/pen/bWLwgP.css"]) | ||
def shutdown(self): | ||
ray.actor.exit_actor() | ||
def get_address(self): | ||
return ray.services.get_node_ip_address() | ||
def start_server(self): | ||
from flask import request | ||
tips = px.data.tips() | ||
col_options = [dict(label=x, value=x) for x in tips.columns] | ||
dimensions = ["x", "y", "color", "facet_col", "facet_row"] | ||
app = self.app_dash | ||
app.layout = html.Div( | ||
[ | ||
html.H1("Demo: Plotly Express in Dash with Tips Dataset"), | ||
html.Div( | ||
[ | ||
html.P([d + ":", dcc.Dropdown(id=d, options=col_options)]) | ||
for d in dimensions | ||
], | ||
style={"width": "25%", "float": "left"}, | ||
), | ||
dcc.Graph(id="graph", style={"width": "75%", "display": "inline-block"}), | ||
] | ||
) | ||
@app.callback(Output("graph", "figure"), [Input(d, "value") for d in dimensions]) | ||
def make_figure(x, y, color, facet_col, facet_row): | ||
return px.scatter( | ||
tips, | ||
x=x, | ||
y=y, | ||
color=color, | ||
facet_col=facet_col, | ||
facet_row=facet_row, | ||
height=700, | ||
) | ||
def shutdown_server(): | ||
func = request.environ.get('werkzeug.server.shutdown') | ||
if func is None: | ||
raise RuntimeError('Not running with the Werkzeug Server') | ||
func() | ||
@app.server.route('/shutdown', methods=['GET', 'POST']) | ||
def shutdown(): | ||
shutdown_server() | ||
return 'Server shutting down...' | ||
print(self.get_address()) | ||
app.run_server(self.get_address(),self.port) | ||
def actor_exits(name): | ||
try: | ||
ray.experimental.get_actor(name) | ||
exists = True | ||
except ValueError: | ||
exists = False | ||
return exists | ||
if actor_exits(APP_NAME): | ||
dash_server = ray.experimental.get_actor(APP_NAME) | ||
try: | ||
ray.get(dash_server.shutdown.remote()) | ||
except Exception: | ||
pass | ||
from ray.experimental.named_actors import _calculate_key | ||
worker = ray.worker.get_global_worker() | ||
worker.redis_client.delete(_calculate_key(APP_NAME)) | ||
dash_server = DashServer.options(name=APP_NAME, detached=True, max_concurrency=2).remote(APP_PORT) | ||
host = ray.get(dash_server.get_address.remote()) | ||
dash_server.start_server.remote() | ||
context.build_result([{"content":"http://{}:{}".format(host,APP_PORT)}]) | ||
''' named mlsql_temp_table2; | ||
|
||
select content as url,"" as dash from mlsql_temp_table2 as output; | ||
``` | ||
|
||
这个代码详细的展示MLSQL了如何结合Ray + Dash + Plotly 制作一张交互报表. 最后展示在MLSQL | ||
Console里的是一个URL地址,访问该地址,得到如下报表: | ||
|
||
![](http://docs.mlsql.tech/upload_images/WX20200414-124022.png) | ||
|
||
我们可以自己动态选择一些参数从而将数据渲染成不同的形态。 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters