Skip to content

Commit

Permalink
add src
Browse files Browse the repository at this point in the history
  • Loading branch information
icepalua committed Dec 2, 2023
1 parent c413b01 commit f2fd2f5
Show file tree
Hide file tree
Showing 3 changed files with 332 additions and 0 deletions.
Binary file added maqiong/result.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
332 changes: 332 additions & 0 deletions maqiong/src/my_code_simular.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,332 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 71,
"id": "ba5fb3f7",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import math\n",
"import re\n",
"from torch.utils.data import Dataset, DataLoader\n",
"import collections\n",
"\n",
"class CosineSimilarity(nn.Module):\n",
" def forward(self, tensor_1,tensor_2):\n",
" normalized_tensor_1=tensor_1/tensor_1.norm(dim=-1,keepdim=True)\n",
" normalized_tensor_2=tensor_2/tensor_2.norm(dim=-1,keepdim=True)\n",
" return (normalized_tensor_1*normalized_tensor_2).sum(dim=-1)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "b8330127",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([-0.0021], grad_fn=<SumBackward1>)\n",
"tensor([1.0000], grad_fn=<SumBackward1>)\n"
]
}
],
"source": [
"#for test\n",
"input1 = torch.randn(1,200,requires_grad=True)\n",
"input2 = torch.randn(1,200,requires_grad=True)\n",
"con = CosineSimilarity()\n",
"s1 = con(input1,input2)\n",
"s2 = torch.cosine_similarity(input1,input1)\n",
"print(s1)\n",
"print(s2)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "0b278674",
"metadata": {},
"outputs": [],
"source": [
"# dealwith text\n",
"def tokenize(text):\n",
" fileters = ['!', '\"', '#', '$', '%', '&', '\\(', '\\)', '\\*', '\\+', ',', '-', '\\.', '/', ':', ';', '<', '=', '>', '\\?', '@'\n",
" , '\\[', '\\\\', '\\]', '^', '_', '`', '\\{', '\\|', '\\}', '~', '\\t', '\\n', '\\x97', '\\x96', '”', '“', '0', '1', '2',\n",
" '3', '4', '5', '6', '7', '8', '9', '10']\n",
" text = re.sub(\"<.*?>\", \" \", text, flags=re.S)\n",
" text = re.sub(\"|\".join(fileters), \" \", text, flags=re.S)\n",
" return [i.strip() for i in text.split()]"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "416c777e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['a', 'dnjsdf']\n"
]
}
],
"source": [
"#test \n",
"res = tokenize(\"a3dnjsdf99099_\")\n",
"print(res)"
]
},
{
"cell_type": "code",
"execution_count": 124,
"id": "358ce47d",
"metadata": {},
"outputs": [],
"source": [
"#gen dict\n",
"def gendict(x,y):\n",
" z = x + '!' + y\n",
" t = tokenize(z)\n",
" li = list(set(t))\n",
" print('diction:')\n",
" print(li)\n",
" tinydict = {}\n",
" it = 1;\n",
" for i in li:\n",
" tinydict[i] = it\n",
" it = it + 1\n",
" for i in li:\n",
" tinydict[i] = tinydict[i]/(len(li)+1)\n",
" print('kv:')\n",
" print(tinydict)\n",
" xx = []\n",
" yy = []\n",
" for ix in tokenize(x):\n",
" xx.append(tinydict[ix])\n",
" for iy in tokenize(y):\n",
" yy.append(tinydict[iy])\n",
" #print(xx)\n",
" #print(yy)\n",
" zz = (xx,yy)\n",
" #print('text to number:')\n",
" #print(zz)\n",
" return (xx,yy)\n"
]
},
{
"cell_type": "code",
"execution_count": 125,
"id": "d73f236e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"diction:\n",
"['a', 'aa', 'dnjsdf']\n",
"kv:\n",
"{'a': 0.25, 'aa': 0.5, 'dnjsdf': 0.75}\n",
"([0.25, 0.75], [0.25, 0.75, 0.5])\n"
]
}
],
"source": [
"#test gendict\n",
"z = gendict(\"a3dnjsdf99099_\",\"a3dnjsdf99099aa_\")\n",
"print(z)"
]
},
{
"cell_type": "code",
"execution_count": 126,
"id": "03552361",
"metadata": {},
"outputs": [],
"source": [
"#f(x,y)\n",
"def f(x,y):\n",
" dic_x = tokenize(x)\n",
" dic_x_count = len(dic_x)\n",
" print(dic_x)\n",
" dic_y = tokenize(y)\n",
" dic_y_count = len(dic_y)\n",
" print(dic_y)\n",
" sx = 0.0\n",
" sy = 0.0\n",
" for xx in dic_x:\n",
" if xx in dic_y:\n",
" sx = sx + 1.0/dic_x_count\n",
" for yy in dic_y:\n",
" if yy in dic_x:\n",
" sy = sy + 1.0/dic_y_count\n",
" print(sx)\n",
" print(sy)\n",
" return max(sx,sy)"
]
},
{
"cell_type": "code",
"execution_count": 127,
"id": "4657d7f2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['dqn', 'n', 'j', 's']\n",
"['dqn', 'js']\n",
"0.25\n",
"0.5\n",
"0.5\n"
]
}
],
"source": [
"#test f(x,y)\n",
"x = \"3dqn1n1j1s99099_\"\n",
"y = \"3dqn1js99099_\"\n",
"test_f = f(x,y)\n",
"print(test_f)"
]
},
{
"cell_type": "code",
"execution_count": 128,
"id": "bbbd7911",
"metadata": {},
"outputs": [],
"source": [
"def embed(x,y):\n",
" dic_x = tokenize(x)\n",
" dic_x_count = len(dic_x)\n",
" dic_y = tokenize(y)\n",
" dic_y_count = len(dic_y)\n",
" emb = torch.nn.Embedding(dic_x_count + dic_y_count,5)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 132,
"id": "0ba94e55",
"metadata": {},
"outputs": [],
"source": [
"#g(x,y)\n",
"def g(x,y):\n",
" (xx,yy) = gendict(x,y)\n",
" count = min(len(xx),len(yy))\n",
" print(count)\n",
" tx = []\n",
" ty = []\n",
" for i in range(0,count):\n",
" tx.append(xx[i])\n",
" ty.append(yy[i])\n",
" print(tx)\n",
" print(ty)\n",
" return con(torch.tensor(tx),torch.tensor(ty))"
]
},
{
"cell_type": "code",
"execution_count": 134,
"id": "1b17ae28",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"diction:\n",
"['j', 'n', 's', 'js', 'dqn']\n",
"kv:\n",
"{'j': 0.16666666666666666, 'n': 0.3333333333333333, 's': 0.5, 'js': 0.6666666666666666, 'dqn': 0.8333333333333334}\n",
"2\n",
"[0.8333333333333334, 0.3333333333333333]\n",
"[0.8333333333333334, 0.6666666666666666]\n",
"tensor(0.9570)\n"
]
}
],
"source": [
"#test g(x,y)\n",
"x = \"3dqn1n1j1s99099_\"\n",
"y = \"3dqn1js99099_\"\n",
"test_g = g(x,y)\n",
"print(test_g)"
]
},
{
"cell_type": "code",
"execution_count": 144,
"id": "1db58b72",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['int', 'sum', 'int', 'x', 'int', 'y', 'return', 'x', 'y']\n",
"['int', 'summary', 'float', 'z', 'return', 'z']\n",
"0.4444444444444444\n",
"0.3333333333333333\n",
"diction:\n",
"['y', 'return', 'x', 'summary', 'sum', 'int', 'z', 'float']\n",
"kv:\n",
"{'y': 0.1111111111111111, 'return': 0.2222222222222222, 'x': 0.3333333333333333, 'summary': 0.4444444444444444, 'sum': 0.5555555555555556, 'int': 0.6666666666666666, 'z': 0.7777777777777778, 'float': 0.8888888888888888}\n",
"6\n",
"[0.6666666666666666, 0.5555555555555556, 0.6666666666666666, 0.3333333333333333, 0.6666666666666666, 0.1111111111111111]\n",
"[0.6666666666666666, 0.4444444444444444, 0.8888888888888888, 0.7777777777777778, 0.2222222222222222, 0.7777777777777778]\n",
"tensor(0.8156)\n"
]
}
],
"source": [
"#result\n",
"x = \"int sum(int x,int y){return x - y;}\"\n",
"y = \"int summary(float z){return z;}\"\n",
"s = max(f(x,y), g(x,y))\n",
"print(s)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8b1a5ce",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Empty file removed maqiong/src/tst.py
Empty file.

0 comments on commit f2fd2f5

Please sign in to comment.