add src

jaccopa · Dec 2, 2023 · f2fd2f5 · f2fd2f5
1 parent c413b01
commit f2fd2f5
Show file tree

Hide file tree

Showing 3 changed files with 332 additions and 0 deletions.
diff --git a/maqiong/result.jpeg b/maqiong/result.jpeg
diff --git a/maqiong/src/my_code_simular.ipynb b/maqiong/src/my_code_simular.ipynb
@@ -0,0 +1,332 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "id": "ba5fb3f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import math\n",
+    "import re\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "import collections\n",
+    "\n",
+    "class CosineSimilarity(nn.Module):\n",
+    "    def forward(self, tensor_1,tensor_2):\n",
+    "        normalized_tensor_1=tensor_1/tensor_1.norm(dim=-1,keepdim=True)\n",
+    "        normalized_tensor_2=tensor_2/tensor_2.norm(dim=-1,keepdim=True)\n",
+    "        return (normalized_tensor_1*normalized_tensor_2).sum(dim=-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "id": "b8330127",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([-0.0021], grad_fn=<SumBackward1>)\n",
+      "tensor([1.0000], grad_fn=<SumBackward1>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "#for test\n",
+    "input1 =  torch.randn(1,200,requires_grad=True)\n",
+    "input2 =  torch.randn(1,200,requires_grad=True)\n",
+    "con    =  CosineSimilarity()\n",
+    "s1     =  con(input1,input2)\n",
+    "s2     =  torch.cosine_similarity(input1,input1)\n",
+    "print(s1)\n",
+    "print(s2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "id": "0b278674",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# dealwith text\n",
+    "def tokenize(text):\n",
+    "    fileters = ['!', '\"', '#', '$', '%', '&', '\\(', '\\)', '\\*', '\\+', ',', '-', '\\.', '/', ':', ';', '<', '=', '>', '\\?', '@'\n",
+    "        , '\\[', '\\\\', '\\]', '^', '_', '`', '\\{', '\\|', '\\}', '~', '\\t', '\\n', '\\x97', '\\x96', '”', '“', '0', '1', '2',\n",
+    "               '3', '4', '5', '6', '7', '8', '9', '10']\n",
+    "    text = re.sub(\"<.*?>\", \" \", text, flags=re.S)\n",
+    "    text = re.sub(\"|\".join(fileters), \" \", text, flags=re.S)\n",
+    "    return [i.strip() for i in text.split()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "id": "416c777e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['a', 'dnjsdf']\n"
+     ]
+    }
+   ],
+   "source": [
+    "#test \n",
+    "res = tokenize(\"a3dnjsdf99099_\")\n",
+    "print(res)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "id": "358ce47d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#gen dict\n",
+    "def gendict(x,y):\n",
+    "    z  = x + '!' + y\n",
+    "    t  = tokenize(z)\n",
+    "    li = list(set(t))\n",
+    "    print('diction:')\n",
+    "    print(li)\n",
+    "    tinydict = {}\n",
+    "    it = 1;\n",
+    "    for i in li:\n",
+    "        tinydict[i] = it\n",
+    "        it = it + 1\n",
+    "    for i in li:\n",
+    "        tinydict[i] = tinydict[i]/(len(li)+1)\n",
+    "    print('kv:')\n",
+    "    print(tinydict)\n",
+    "    xx = []\n",
+    "    yy = []\n",
+    "    for ix in tokenize(x):\n",
+    "        xx.append(tinydict[ix])\n",
+    "    for iy in tokenize(y):\n",
+    "        yy.append(tinydict[iy])\n",
+    "    #print(xx)\n",
+    "    #print(yy)\n",
+    "    zz = (xx,yy)\n",
+    "    #print('text to number:')\n",
+    "    #print(zz)\n",
+    "    return (xx,yy)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 125,
+   "id": "d73f236e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "diction:\n",
+      "['a', 'aa', 'dnjsdf']\n",
+      "kv:\n",
+      "{'a': 0.25, 'aa': 0.5, 'dnjsdf': 0.75}\n",
+      "([0.25, 0.75], [0.25, 0.75, 0.5])\n"
+     ]
+    }
+   ],
+   "source": [
+    "#test gendict\n",
+    "z = gendict(\"a3dnjsdf99099_\",\"a3dnjsdf99099aa_\")\n",
+    "print(z)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 126,
+   "id": "03552361",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#f(x,y)\n",
+    "def f(x,y):\n",
+    "    dic_x = tokenize(x)\n",
+    "    dic_x_count = len(dic_x)\n",
+    "    print(dic_x)\n",
+    "    dic_y = tokenize(y)\n",
+    "    dic_y_count = len(dic_y)\n",
+    "    print(dic_y)\n",
+    "    sx = 0.0\n",
+    "    sy = 0.0\n",
+    "    for xx in dic_x:\n",
+    "        if xx in dic_y:\n",
+    "            sx = sx + 1.0/dic_x_count\n",
+    "    for yy in dic_y:\n",
+    "        if yy in dic_x:\n",
+    "            sy = sy + 1.0/dic_y_count\n",
+    "    print(sx)\n",
+    "    print(sy)\n",
+    "    return max(sx,sy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 127,
+   "id": "4657d7f2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['dqn', 'n', 'j', 's']\n",
+      "['dqn', 'js']\n",
+      "0.25\n",
+      "0.5\n",
+      "0.5\n"
+     ]
+    }
+   ],
+   "source": [
+    "#test f(x,y)\n",
+    "x = \"3dqn1n1j1s99099_\"\n",
+    "y = \"3dqn1js99099_\"\n",
+    "test_f = f(x,y)\n",
+    "print(test_f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "id": "bbbd7911",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def embed(x,y):\n",
+    "    dic_x = tokenize(x)\n",
+    "    dic_x_count = len(dic_x)\n",
+    "    dic_y = tokenize(y)\n",
+    "    dic_y_count = len(dic_y)\n",
+    "    emb = torch.nn.Embedding(dic_x_count + dic_y_count,5)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 132,
+   "id": "0ba94e55",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#g(x,y)\n",
+    "def g(x,y):\n",
+    "    (xx,yy) = gendict(x,y)\n",
+    "    count = min(len(xx),len(yy))\n",
+    "    print(count)\n",
+    "    tx = []\n",
+    "    ty = []\n",
+    "    for i in range(0,count):\n",
+    "        tx.append(xx[i])\n",
+    "        ty.append(yy[i])\n",
+    "    print(tx)\n",
+    "    print(ty)\n",
+    "    return con(torch.tensor(tx),torch.tensor(ty))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 134,
+   "id": "1b17ae28",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "diction:\n",
+      "['j', 'n', 's', 'js', 'dqn']\n",
+      "kv:\n",
+      "{'j': 0.16666666666666666, 'n': 0.3333333333333333, 's': 0.5, 'js': 0.6666666666666666, 'dqn': 0.8333333333333334}\n",
+      "2\n",
+      "[0.8333333333333334, 0.3333333333333333]\n",
+      "[0.8333333333333334, 0.6666666666666666]\n",
+      "tensor(0.9570)\n"
+     ]
+    }
+   ],
+   "source": [
+    "#test g(x,y)\n",
+    "x = \"3dqn1n1j1s99099_\"\n",
+    "y = \"3dqn1js99099_\"\n",
+    "test_g = g(x,y)\n",
+    "print(test_g)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "id": "1db58b72",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['int', 'sum', 'int', 'x', 'int', 'y', 'return', 'x', 'y']\n",
+      "['int', 'summary', 'float', 'z', 'return', 'z']\n",
+      "0.4444444444444444\n",
+      "0.3333333333333333\n",
+      "diction:\n",
+      "['y', 'return', 'x', 'summary', 'sum', 'int', 'z', 'float']\n",
+      "kv:\n",
+      "{'y': 0.1111111111111111, 'return': 0.2222222222222222, 'x': 0.3333333333333333, 'summary': 0.4444444444444444, 'sum': 0.5555555555555556, 'int': 0.6666666666666666, 'z': 0.7777777777777778, 'float': 0.8888888888888888}\n",
+      "6\n",
+      "[0.6666666666666666, 0.5555555555555556, 0.6666666666666666, 0.3333333333333333, 0.6666666666666666, 0.1111111111111111]\n",
+      "[0.6666666666666666, 0.4444444444444444, 0.8888888888888888, 0.7777777777777778, 0.2222222222222222, 0.7777777777777778]\n",
+      "tensor(0.8156)\n"
+     ]
+    }
+   ],
+   "source": [
+    "#result\n",
+    "x = \"int sum(int x,int y){return x - y;}\"\n",
+    "y = \"int summary(float z){return z;}\"\n",
+    "s = max(f(x,y), g(x,y))\n",
+    "print(s)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c8b1a5ce",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/maqiong/src/tst.py b/maqiong/src/tst.py