Skip to content

Commit

Permalink
用户画像代码
Browse files Browse the repository at this point in the history
  • Loading branch information
HunterChao committed Apr 2, 2020
1 parent c709843 commit bf568a0
Show file tree
Hide file tree
Showing 49 changed files with 650 additions and 23 deletions.
42 changes: 20 additions & 22 deletions TFIDF/TFIDF_user_portrait_weight.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

-- TF计算每个人身上的标签权重
drop table if exists gdw_working.keep_zhaoht_user_tfidf_01;
create table gdw_working.keep_zhaoht_user_tfidf_01
drop table if exists temp.keep_zhaoht_user_tfidf_01;
create table temp.keep_zhaoht_user_tfidf_01
as
select tt1.user_id,
tt1.org_id,
Expand All @@ -13,42 +13,40 @@
t1.org_id,
t1.org_name,
count(t1.org_id) weight_m_p --每个人每类标签个数
from wedw_dw.peasona_user_tag_relation t1
from dw.peasona_user_tag_relation t1
group by 1,2,3
) tt1
left join (
select t2.user_id,
count(t2.org_id) weight_m_s --每个人身上标签总数
from wedw_dw.peasona_user_tag_relation t2
from dw.peasona_user_tag_relation t2
group by 1
) tt2
on tt1.user_id = tt2.user_id


-- IDF计算每个标签在全体标签中的权重
drop table if exists gdw_working.keep_zhaoht_user_tfidf_02;
create table gdw_working.keep_zhaoht_user_tfidf_02
drop table if exists temp.keep_zhaoht_user_tfidf_02;
create table temp.keep_zhaoht_user_tfidf_02
as
select tt1.org_id,
tt1.org_name,
tt1.weight_w_p,
tt2.weight_w_s
from (
select t1.org_id,
from ( select t1.org_id,
t1.org_name,
cast(sum(weight_m_p) as int) as weight_w_p --每个标签一共有多少
from gdw_working.keep_zhaoht_user_tfidf_01 t1
from temp.keep_zhaoht_user_tfidf_01 t1
group by 1,2
) tt1
cross join (
select sum(t2.weight_m_p) as weight_w_s --全体所有标签的总个数
from gdw_working.keep_zhaoht_user_tfidf_01 t2
cross join (select sum(t2.weight_m_p) as weight_w_s --全体所有标签的总个数
from temp.keep_zhaoht_user_tfidf_01 t2
) tt2


-- TF-IDF计算每个人身上标签权重 (到这个表为止,用户身上每个标签的TFIDF权重计算完)
drop table if exists gdw_working.keep_zhaoht_user_tfidf_03;
create table gdw_working.keep_zhaoht_user_tfidf_03
drop table if exists temp.keep_zhaoht_user_tfidf_03;
create table temp.keep_zhaoht_user_tfidf_03
as
select t1.user_id,
t1.org_id,
Expand All @@ -60,16 +58,16 @@
--t3.weight_w_p,
--t3.weight_w_s,
(t1.weight_m_p/t1.weight_m_s)*(log10(t2.weight_w_s/t2.weight_w_p)) as ratio --TFIDF计算每个用户每个标签权重
from gdw_working.keep_zhaoht_user_tfidf_01 t1 -- 用户标签表
left join gdw_working.keep_zhaoht_user_tfidf_02 t2
from temp.keep_zhaoht_user_tfidf_01 t1 -- 用户标签表
left join temp.keep_zhaoht_user_tfidf_02 t2
on t1.org_id = t2.org_id





-- 用户标签权重计算 = 行为时间衰减 * 行为权重 * 行为次数 * 标签TFIDF
create table gdw_working.keep_zhaoht_user_tfidf_04
create table temp.keep_zhaoht_user_tfidf_04
as
select tt1.user_id,
tt1.org_id,
Expand All @@ -92,13 +90,13 @@
when t2.is_time_reduce = 0 then
cast(t2.act_weight as float) * cast(t1.cnt as int) * t3.ratio
end as act_weight
from wedw_dw.peasona_user_tag_relation t1
left join wedw_dim.peasona_user_act_weight_manual t2
from dw.peasona_user_tag_relation t1
left join dim.peasona_user_act_weight_manual t2
on t1.act_type_id = t2.act_type_id
left join gdw_working.keep_zhaoht_user_tfidf_03 t3
left join temp.keep_zhaoht_user_tfidf_03 t3
on t1.user_id = t3.user_id and t1.org_id = t3.org_id
where t1.date_id >= DATE_SUB('2017-07-20', 365)
and t2.plan_id = 1 --医言堂
and t2.plan_id = 1
) tt1
where tt1.user_id is not null

Expand All @@ -109,7 +107,7 @@
org_id,
org_name,
sum(act_weight) as weight
from gdw_working.keep_zhaoht_user_tfidf_04
from temp.keep_zhaoht_user_tfidf_04
where user_id='45709136'
group by 1,2,3
order by weight desc
Expand Down
5 changes: 5 additions & 0 deletions UserprofileHive2Hbase/.idea/codeStyles/codeStyleConfig.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions UserprofileHive2Hbase/.idea/compiler.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions UserprofileHive2Hbase/.idea/hydra.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions UserprofileHive2Hbase/.idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions UserprofileHive2Hbase/.idea/sbt.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions UserprofileHive2Hbase/.idea/scala_compiler.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

124 changes: 124 additions & 0 deletions UserprofileHive2Hbase/.idea/uiDesigner.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bf568a0

Please sign in to comment.