Skip to content

Commit

Permalink
usage monitoring app
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-asaini committed Jun 10, 2024
1 parent c14f1d0 commit a56965c
Show file tree
Hide file tree
Showing 5 changed files with 304 additions and 0 deletions.
79 changes: 79 additions & 0 deletions Streamlit in Snowflake Usage Monitoring/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
![](../shared_assets/sis-header.jpeg)

# Streamlit in Snowflake Usage Monitoring

This app helps Accountadmins monitor the usage of Streamlit in Snowflake apps. It uses the [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage) schema to query historical usage data for your account.


## Prerequisites

The App Owner Role must have SELECT privileges on the [Query History view](https://docs.snowflake.com/en/sql-reference/account-usage/query_history).


## App Layout

This app contains 2 tabs:

1. Account Level App Usage
2. App Level Usage


### Account Level Usage

![](./assets/account_level.png)

### App Level Usage

![](./assets/app_level.png)

### Raw Data

The app also shows the Raw data powering the graphs above.


## Streamlit Execution Metadata

All Streamlit in Snowflake apps executions are recorded in the [Query History view](https://docs.snowflake.com/en/sql-reference/account-usage/query_history). This can be visualized by going to `Monitoring` -> `Query History` in Snowsight. For example, all Streamlit app executions show up as

```sql
execute streamlit "ST_TEST_DB"."DEV"."KP1V05SACRMOJ9PX"()
```

You can also run a SQL query in a Worksheet to obtain the same information.

```sql
select
user_name as User,
total_elapsed_time/60000 as Minutes_Used,
date(start_time) as Date,
try_parse_json(query_tag):StreamlitName as AppName,
query_text
from snowflake.monitoring.query_history
where
-- Enter a date in the line below
start_time >= '{start_date}'
and try_parse_json(query_tag):StreamlitEngine = 'ExecuteStreamlit'
and try_parse_json(query_tag):ChildQuery IS NULL
and contains(query_text, 'execute streamlit')
order by Date desc;
```

All Streamlit in Snowflake app executions contain a `QUERY_TAG`, for e.g.

```json
{
"StreamlitEngine":"ExecuteStreamlit",
"StreamlitName":"ST_TEST_DB.DEV.KP1V05SACRMOJ9PX"
}
```

Any child queries executed by the app also contain an additional field in the `QUERY_TAG` indicating that it is a child query

```json
{
"StreamlitEngine":"ExecuteStreamlit",
"StreamlitName":"ST_TEST_DB.DEV.KP1V05SACRMOJ9PX",
"ChildQuery":true
}
```

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
117 changes: 117 additions & 0 deletions Streamlit in Snowflake Usage Monitoring/pages/app_level_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Import python packages
import streamlit as st
from snowflake.snowpark.context import get_active_session
import datetime
import altair as alt
import pandas as pd


today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
week_ago = today - datetime.timedelta(days=7)
last_30_days = today - datetime.timedelta(days=30)


# Write directly to the app
st.title("App Level Usage Monitoring :balloon:")
st.write(
"""Metrics about the usage of a specific app in your account
"""
)

def generate_sql(start_date):
sql = f"""
select
user_name as User,
total_elapsed_time/60000 as Minutes_Used,
date(start_time) as Date,
try_parse_json(query_tag):StreamlitName as AppName,
query_text
from snowflake.monitoring.query_history
where start_time >= '{start_date}'
and try_parse_json(query_tag):StreamlitEngine = 'ExecuteStreamlit'
and try_parse_json(query_tag):ChildQuery IS NULL
and contains(query_text, 'execute streamlit')
order by Date desc;
"""
return sql

@st.cache_data
def query_sf(sql):
df = session.sql(sql).to_pandas()
return df


def generate_bar_chart(views_over_time):
views_over_time.index = pd.to_datetime(views_over_time.index) # Convert index to datetime

# Convert Series to DataFrame for Altair
df = views_over_time.reset_index()
df.columns = ['Date', 'Value'] # Rename columns for clarity

# Create bar chart
chart = alt.Chart(df).mark_bar().encode(
x=alt.X('Date:T', title='Date'),
y=alt.Y('Value:Q', title='Number of Views'),
tooltip=['Date', 'Value'] # Add tooltips to display the date and value on hover
).properties(
width=600,
height=400
)

st.altair_chart(chart)


@st.cache_data
def calculate_metrics(df):
num_users = df['USER'].nunique()
num_apps = df['APPNAME'].nunique()
num_views = len(df.index)

st.subheader('Summary Stats')
col1, col2, col3 = st.columns(3)

with col1:
st.metric(label='Unique Users', value=num_users)

with col2:
st.metric(label='Apps Used', value=num_apps)

with col3:
st.metric(label='Total Views', value=num_views)

st.subheader('Views over time')
views_over_time = df.groupby(['DATE']).size()
generate_bar_chart(views_over_time)

st.subheader('Raw Data')
st.dataframe(df)

session = get_active_session()

sql = generate_sql(last_30_days)
df = query_sf(sql)
list_of_apps = df['APPNAME'].unique()

app = st.selectbox('Choose App Name', list_of_apps)

tab_yesterday, tab_last_week, tab_last_30_days = st.tabs(["Last Day", "Last Week", "Last 30 Days"])

with tab_yesterday:
sql = generate_sql(yesterday)
df = query_sf(sql)
df2 = df[df['APPNAME'] == app]
calculate_metrics(df2)


with tab_last_week:
sql = generate_sql(week_ago)
df = query_sf(sql)
df2 = df[df['APPNAME'] == app]
calculate_metrics(df2)

with tab_last_30_days:
sql = generate_sql(last_30_days)
df = query_sf(sql)
df2 = df[df['APPNAME'] == app]
calculate_metrics(df2)
108 changes: 108 additions & 0 deletions Streamlit in Snowflake Usage Monitoring/streamlit_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Import python packages
import streamlit as st
from snowflake.snowpark.context import get_active_session
import datetime
import altair as alt
import pandas as pd


today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
week_ago = today - datetime.timedelta(days=7)
last_30_days = today - datetime.timedelta(days=30)


# Write directly to the app
st.title("Streamlit in Snowflake Usage :balloon:")
st.write(
"""Metrics about the overall Streamlit in Snowflake usage in your account
"""
)

def generate_sql(start_date):
sql = f"""
select
user_name as User,
total_elapsed_time/60000 as Minutes_Used,
date(start_time) as Date,
try_parse_json(query_tag):StreamlitName as AppName,
query_text
from snowflake.monitoring.query_history
where start_time >= '{start_date}'
and try_parse_json(query_tag):StreamlitEngine = 'ExecuteStreamlit'
and try_parse_json(query_tag):ChildQuery IS NULL
and contains(query_text, 'execute streamlit')
order by Date desc;
"""
return sql

@st.cache_data
def query_sf(sql):
df = session.sql(sql).to_pandas()
return df


def generate_bar_chart(views_over_time):
views_over_time.index = pd.to_datetime(views_over_time.index) # Convert index to datetime

# Convert Series to DataFrame for Altair
df = views_over_time.reset_index()
df.columns = ['Date', 'Value'] # Rename columns for clarity

# Create bar chart
chart = alt.Chart(df).mark_bar().encode(
x=alt.X('Date:T', title='Date'),
y=alt.Y('Value:Q', title='Number of Views'),
tooltip=['Date', 'Value'] # Add tooltips to display the date and value on hover
).properties(
width=600,
height=400
)

st.altair_chart(chart)


@st.cache_data
def calculate_metrics(df):
num_users = df['USER'].nunique()
num_apps = df['APPNAME'].nunique()
num_views = len(df.index)

st.subheader('Summary Stats')
col1, col2, col3 = st.columns(3)

with col1:
st.metric(label='Unique Users', value=num_users)

with col2:
st.metric(label='Apps Used', value=num_apps)

with col3:
st.metric(label='Total Views', value=num_views)

st.subheader('Views over time')
views_over_time = df.groupby(['DATE']).size()
generate_bar_chart(views_over_time)

st.subheader('Raw Data')
st.dataframe(df)

session = get_active_session()

tab_yesterday, tab_last_week, tab_last_30_days = st.tabs(["Last Day", "Last Week", "Last 30 Days"])

with tab_yesterday:
sql = generate_sql(yesterday)
df = query_sf(sql)
calculate_metrics(df)


with tab_last_week:
sql = generate_sql(week_ago)
df = query_sf(sql)
calculate_metrics(df)

with tab_last_30_days:
sql = generate_sql(last_30_days)
df = query_sf(sql)
calculate_metrics(df)

0 comments on commit a56965c

Please sign in to comment.