-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathServer_streamlit.py
229 lines (189 loc) · 9.58 KB
/
Server_streamlit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import streamlit as st
from streamlit.legacy_caching.hashing import _CodeHasher
import pandas as pd
import numpy as np
from streamlit.scriptrunner import add_script_run_ctx as get_report_ctx
from streamlit.server.server import Server
import hydralit_components as hc
from PredictData import streamlit_predict_emotion
from CollectData import streamlit_crawl_data
from CleanData import streamlit_clean_data
from VisualizeData import streamlit_visualize_data
from end_to_end_streamlit import end_to_end_process
import test_pymongo
import processing_db
menu_data = [
{'label':"YOUTUBE"},
{'label':"SHOPEE"},
{'label':"TRAVELOKA"},
{'label':"MONGODB"},
{'label':"SQLITE"}
]
class _SessionState:
def __init__(self, session, hash_funcs):
"""Initialize SessionState instance."""
self.__dict__["_state"] = {
"data": {},
"hash": None,
"hasher": _CodeHasher(hash_funcs),
"is_rerun": False,
"session": session,
}
def __call__(self, **kwargs):
"""Initialize state data once."""
for item, value in kwargs.items():
if item not in self._state["data"]:
self._state["data"][item] = value
def __getitem__(self, item):
"""Return a saved state value, None if item is undefined."""
return self._state["data"].get(item, None)
def __getattr__(self, item):
"""Return a saved state value, None if item is undefined."""
return self._state["data"].get(item, None)
def __setitem__(self, item, value):
"""Set state value."""
self._state["data"][item] = value
def __setattr__(self, item, value):
"""Set state value."""
self._state["data"][item] = value
def clear(self):
"""Clear session state and request a rerun."""
self._state["data"].clear()
self._state["session"].request_rerun()
def sync(self):
"""Rerun the app with all state values up to date from the beginning to fix rollbacks."""
# Ensure to rerun only once to avoid infinite loops
# caused by a constantly changing state value at each run.
#
# Example: state.value += 1
if self._state["is_rerun"]:
self._state["is_rerun"] = False
elif self._state["hash"] is not None:
if self._state["hash"] != self._state["hasher"].to_bytes(self._state["data"], None):
self._state["is_rerun"] = True
self._state["session"].request_rerun()
self._state["hash"] = self._state["hasher"].to_bytes(self._state["data"], None)
def _get_session():
session_id = get_report_ctx().streamlit_script_run_ctx.session_id
session_info = Server.get_current()._get_session_info(session_id)
if session_info is None:
raise RuntimeError("Couldn't get your Streamlit Session object.")
return session_info.session
def _get_state(hash_funcs=None):
session = _get_session()
if not hasattr(session, "_custom_session_state"):
session._custom_session_state = _SessionState(session, hash_funcs)
return session._custom_session_state
def page_crawling(state):
menu_id = hc.nav_bar(menu_definition=menu_data)
if menu_id == "YOUTUBE":
streamlit_crawl_data.YoutubeCrawling_V1()
elif menu_id == "SHOPEE":
streamlit_crawl_data.ShopeeCrawling()
elif menu_id == "TRAVELOKA":
streamlit_crawl_data.TravelokaCrawling()
elif menu_id == "MONGODB":
test_pymongo.collect_mongoDB()
else:
processing_db.collect_sqlite()
def page_dashboard(state):
st.header(":mag_right: DATA COLLECTION")
st.subheader("**This site is for collect data from Youtube**")
# st.subheader("**YOUTUBE CRAWL DATA INSTRUCTION** ")
st.write(":point_right: Copy your youtube video's url then paste to the **INPUT YOUTUBE URL** field.")
st.write(":point_right: Then type the number of scroll that you want the tool to scroll.")
st.write(":point_right: If you want to save your data to mongoDB through the tool, choose **Y** option in the **SAVE MONGO_DB (Y/N)** field, otherwise choose **N**")
st.write(":point_right: If you want to save your data to SQLITE through the tool, choose **Y** option in the **SAVE MONGO_DB (Y/N)** field, otherwise choose **N**")
st.write(":arrow_down: User can download file through the **Click Here!!!** option")
st.image("dashboard/collect.png")
st.write("**:exclamation::exclamation::exclamation:More over, we are going to develop this tool for analysis on 2 more page: Shopee and Travelkola. But sometimes bugs still appear because our code is not compatible with these page's source code.**")
# st.write("**Hơn nữa chúng em vẫn đang định phát triển tools ở những trang web khác như là Shopee và Travelkola nhưng vẫn chưa hoàn thiện do là code chưa phù hợp với mã nguồn của những trang web ấy.**")
# st.image("crawl_data_utube.png",width=350)
st.write("--")
st.subheader(":bulb: Advantage of MongoDB and SQLITE for the tool")
st.write("When user chooses to save the data file on **MongoDB** or **SQLITE**, the data file will be stored to these DB. If the user want to do another process such as **DATA PROCESSING**, user can download these cleaned data file as input from **MONGODB** tab or **SQLITE** tab in each section of the tool. (Some section hasn't have these tab yet).")
st.markdown("--")
st.header(":hourglass_flowing_sand: DATA PROCESSING")
st.subheader("This section cleans data from the file, then chooses 3 columns user_name, time_post, comment_post to create a new data file begin with CLEAN.")
st.write(":one: Upload your .csv file from your computer through **Browse files** button.")
st.write(":two: Then choose to save on MongoDB or not.")
st.write(":three: Then choose to save on SQLITE or not.")
st.write(":four: Finally click in the **PROCESSING** button.")
st.write(":five: After processing, You can download the file through the **CLICK HERE** option")
st.image("dashboard/processing.png")
st.markdown("--")
st.header(":bar_chart: DATA ANALYSIS")
st.subheader("This section has 2 tabs: PREDICT EMOTION and VISUALIZE.")
st.write(":small_blue_diamond: **PREDICT EMOTION** This tab will predict your data file base on 2 models: Naive Bayes and Bert")
st.write(":zap: First, you need to upload your data file which is cleaned.")
st.write(":zap: Second, choosing the model you want.")
st.write(":zap: Then choose to save your output data file after process to MongoDB or SQLITE or not.")
st.write(":arrow_down: After clicking the **process** button, the tool will predict your data file base on the model you have chosen then store the data to a new file begin with **PREDICT**. You can download this file such as **CLEAN** file")
st.image("dashboard/predict.png")
st.write(":small_blue_diamond: **VISUALIZE** This tab will visualize your data file through chart.")
st.write(":chart_with_upwards_trend: Just upload your data file then let the tool do the remaining!")
st.image("dashboard/visualize.png")
st.markdown("--")
st.header(":triangular_flag_on_post: END TO END PROCESSING")
st.subheader("This section will do all function from the begining to the end in order: CLEAN DATA - PREDICT DATA - VISUALIZE")
st.write(":low_brightness: You just need to upload your raw data file then the tool will **clean** it then **predict** and finally **visualize** the result data.")
st.image("dashboard/endtoend.png")
st.markdown("--")
st.header(":hourglass: DATA STREAMING")
def page_clean(state):
# st.write("CLEAN DATA")
menu_clean = [
{'label':"CSV"},
{'label':"MONGODB"},
{'label':"SQLITE"},
]
menu_id = hc.nav_bar(menu_definition=menu_clean)
if menu_id == "CSV":
streamlit_clean_data.processingCSV()
elif menu_id == "MONGODB":
streamlit_clean_data.processingMongoDB()
else:
processing_db.processing_sqlite_db()
def page_output(state):
menu_clean = [
{'label':"PREDICT EMOTION"},
{'label':"VISUALIZE"}
]
menu_id = hc.nav_bar(menu_definition=menu_clean)
if menu_id == "PREDICT EMOTION":
streamlit_predict_emotion.predict()
else:
streamlit_visualize_data.visualizeDT()
def page_streaming_data(state):
st.title("STREAMING DATA")
st.write("=============COMING SOON=============")
pass
def page_end_to_end(state):
menu_end_to_end = [
{'label':"CSV"},
{'label':"MONGODB"},
{'label':"SQLITE"}
]
menu_id = hc.nav_bar(menu_definition=menu_end_to_end)
if menu_id == "CSV":
end_to_end_process()
elif menu_id == "MONGODB":
end_to_end_process(mode="MONGODB")
else:
end_to_end_process(mode="TRAVELOKA")
def main():
st.set_page_config(layout = "wide",page_title="server")
state = _get_state()
pages = {}
pages["DASHBOARD"] = page_dashboard
pages["DATA COLLECTION"] = page_crawling
pages["DATA PROCESSING"] = page_clean
pages["DATA ANALYSIS"] = page_output
pages["END TO END PROCESSING"] = page_end_to_end
pages["DATA STREAMING"] = page_streaming_data
st.sidebar.title(":floppy_disk: Dashboard")
page = st.sidebar.radio("Select your page", tuple(pages.keys()))
pages[page](state)
state.sync()
if __name__ == "__main__":
main()