{"id":751,"date":"2020-12-30T10:22:12","date_gmt":"2020-12-30T01:22:12","guid":{"rendered":"http:\/\/cedartrees.co.kr\/?p=751"},"modified":"2021-04-03T19:08:09","modified_gmt":"2021-04-03T10:08:09","slug":"word2vec-pytorch","status":"publish","type":"post","link":"http:\/\/blog.cedartrees.co.kr\/index.php\/2020\/12\/30\/word2vec-pytorch\/","title":{"rendered":"Word2Vec \uad6c\ud604"},"content":{"rendered":"\n<p>Word2Vec\uc744 pytorch\ub97c \ud1b5\ud574\uc11c \uad6c\ud604\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4. \ud30c\uc774\ud1a0\uce58 \uacf5\uc2dd\ud648\uc5d0\ub3c4 \uc720\uc0ac\ud55c \uc608\uc81c\uac00 \uc788\uc73c\ub2c8 \uad00\uc2ec\uc788\uc73c\uc2e0 \ubd84\ub4e4\uc740 \uacf5\uc2dd\ud648\uc5d0 \uc788\ub294 \ub0b4\uc6a9\uc744 \uc77d\uc5b4\ubcf4\uc2dc\ub294 \uac83\uc774 \ub3c4\uc6c0\uc774 \ub418\uc2dc\ub9ac\ub77c \uc0dd\uac01\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n<p>\uba3c\uc800 \uc544\ub798\uc640 \uac19\uc774 \ud544\uc694\ud55c \ub77c\uc774\ube0c\ub7ec\ub9ac\ub4e4\uc744 \uc784\ud3ec\ud2b8\ud569\ub2c8\ub2e4. \ub9c8\uc9c0\ub9c9\uc5d0 \uc784\ud3ec\ud2b8\ud55c matplotlib\uc758 \uacbd\uc6b0\ub294 \uc2dc\uac01\ud654\ub97c \uc704\ud55c \uac83\uc73c\ub85c \ub2e8\uc5b4\ub4e4\uc774 \uc5b4\ub5a4 \uc0c1\uad00\uc131\uc744 \uac00\uc9c0\ub294\uc9c0 \ud655\uc778\ud574\ubcf4\uae30 \uc704\ud568\uc785\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">import torch\nimport torch.nn as nn\nimport torch.optim as optim\nimport torch.nn.functional as F\n\nimport numpy as np\nimport pandas as pd<\/pre>\n\n\n\n<p>\uc544\ub798\uc640 \uac19\uc740 \ud14d\uc2a4\ud2b8\ub97c \uc120\uc5b8\ud569\ub2c8\ub2e4. \uba87\uac1c\uc758 \ub2e8\uc5b4\ub85c \uad6c\uc131\ub41c \ubb38\uc7a5\uc774\uace0 \uc911\ubcf5\ub41c \ubb38\uc7a5\ub4e4\uc744 \ubcf5\uc0ac\ud574\uc11c \ubd99\uc5ec \ub123\uc5c8\uc2b5\ub2c8\ub2e4. Word2Vec\uc744 \uad6c\ud604\ud558\ub294\ub370 \uc5ec\ub7ec \ubc29\uc2dd\uc774 \uc788\uc9c0\ub9cc \uc774\ubc88 \uc608\uc81c\uc5d0\uc11c\ub294 Skip-Gram \ubc29\uc2dd\uc744 \uc0ac\uc6a9\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"414\" height=\"471\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/\u1109\u1173\u110f\u1173\u1105\u1175\u11ab\u1109\u1163\u11ba-2020-12-30-\u110b\u1169\u110c\u1165\u11ab-11.23.21.png\" alt=\"\" class=\"wp-image-761\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/\u1109\u1173\u110f\u1173\u1105\u1175\u11ab\u1109\u1163\u11ba-2020-12-30-\u110b\u1169\u110c\u1165\u11ab-11.23.21.png 414w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/\u1109\u1173\u110f\u1173\u1105\u1175\u11ab\u1109\u1163\u11ba-2020-12-30-\u110b\u1169\u110c\u1165\u11ab-11.23.21-264x300.png 264w\" sizes=\"(max-width: 414px) 100vw, 414px\" \/><\/figure><\/div>\n\n\n\n<p>\uc704 \uad6c\uc870\uc5d0\uc11c \ud575\uc2ec\uc740 \uac00\uc911\uce58\ud589\ub82c\u00a0WW,\u00a0W\u2032W\u2032\u00a0\ub450 \uac1c\uc785\ub2c8\ub2e4. Word2Vec\uc758 \ud559\uc2b5\uacb0\uacfc\uac00 \uc774 \ub450 \uac1c\uc758 \ud589\ub82c\uc785\ub2c8\ub2e4. \uadf8\ub9bc\uc744 \uc790\uc138\ud788 \ubcf4\uc2dc\uba74 \uc785\ub825\uce35-\uc740\ub2c9\uce35, \uc740\ub2c9\uce35-\ucd9c\ub825\uce35\uc744 \uc787\ub294 \uac00\uc911\uce58 \ud589\ub82c\uc758 \ubaa8\uc591\uc774 \uc11c\ub85c\u00a0<strong>\uc804\uce58(transpose)<\/strong>\ud55c \uac83\uacfc \ub3d9\uc77c\ud55c \uac83\uc744 \ubcfc \uc218 \uc788\uc2b5\ub2c8\ub2e4. \uadf8\ub7f0\ub370 \uc804\uce58\ud558\uba74 \uadf8 \ubaa8\uc591\uc774 \uac19\ub2e4\uace0 \ud574\uc11c \uc644\ubcbd\ud788 \ub3d9\uc77c\ud55c \ud589\ub82c\uc740 \uc544\ub2c8\ub77c\ub294 \uc810\uc5d0 \uc8fc\uc758\ud560 \ud544\uc694\uac00 \uc788\uc2b5\ub2c8\ub2e4. \ubb3c\ub860 \ub450 \ud589\ub82c\uc744 \ud558\ub098\uc758 \ud589\ub82c\ub85c \ucde8\uae09(tied)\ud558\ub294 \ubc29\uc2dd\uc73c\ub85c \ud559\uc2b5\uc744 \uc9c4\ud589\ud560 \uc218 \uc788\uace0, \ud559\uc2b5\uc774 \uc544\uc8fc \uc798\ub418\uba74\u00a0WW\uc640\u00a0W\u2032W\u2032\u00a0\uac00\uc6b4\ub370 \uc5b4\ub5a4 \uac78 \ub2e8\uc5b4\ubca1\ud130\ub85c \uc4f0\ub4e0 \uad00\uacc4\uac00 \uc5c6\ub2e4\uace0 \ud569\ub2c8\ub2e4.<\/p>\n\n\n\n<p>\ub610 \ub2e4\ub978 \ubc29\ubc95\uc740 COBOW(Continuous Bag-of-Words) \ubc29\uc2dd\uc774 \uc788\uc2b5\ub2c8\ub2e4. \uc774 \ubc29\uc2dd\uc740 Skip-Gram\uacfc \ubc18\ub300\uc758 \ubc29\uc2dd\uc785\ub2c8\ub2e4.<br>CBOW\ub294 \uc8fc\ubcc0\uc5d0 \uc788\ub294 \ub2e8\uc5b4\ub4e4\uc744 \uac00\uc9c0\uace0, \uc911\uac04\uc5d0 \uc788\ub294 \ub2e8\uc5b4\ub4e4\uc744 \uc608\uce21\ud558\ub294 \ubc29\ubc95\uc785\ub2c8\ub2e4. \ubc18\ub300\ub85c, Skip-Gram\uc740 \uc911\uac04\uc5d0 \uc788\ub294 \ub2e8\uc5b4\ub85c \uc8fc\ubcc0 \ub2e8\uc5b4\ub4e4\uc744 \uc608\uce21\ud558\ub294 \ubc29\ubc95\uc785\ub2c8\ub2e4. \uba54\ucee4\ub2c8\uc998 \uc790\uccb4\ub294 \uac70\uc758 \ub3d9\uc77c\ud558\uae30 \ub54c\ubb38\uc5d0 \uc774\ud574\ud558\ub294\ub370 \uc5b4\ub835\uc9c0\ub294 \uc54a\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n<p>\ubcf4\ud1b5 \ub525\ub7ec\ub2dd\uc774\ub77c\ud568\uc740, \uc785\ub825\uce35\uacfc \ucd9c\ub825\uce35 \uc0ac\uc774\uc758 \uc740\ub2c9\uce35\uc758 \uac1c\uc218\uac00 \ucda9\ubd84\ud788 \uc313\uc778 \uc2e0\uacbd\ub9dd\uc744 \ud559\uc2b5\ud560 \ub54c\ub97c \ub9d0\ud558\ub294\ub370 Word2Vec\ub294 \uc785\ub825\uce35\uacfc \ucd9c\ub825\uce35 \uc0ac\uc774\uc5d0 \ud558\ub098\uc758 \uc740\ub2c9\uce35\ub9cc\uc774 \uc874\uc7ac\ud569\ub2c8\ub2e4. \uc774\ub807\uac8c \uc740\ub2c9\uce35(hidden Layer)\uc774 1\uac1c\uc778 \uacbd\uc6b0\uc5d0\ub294 \uc77c\ubc18\uc801\uc73c\ub85c \uc2ec\uce35\uc2e0\uacbd\ub9dd(Deep Neural Network)\uc774 \uc544\ub2c8\ub77c \uc595\uc740\uc2e0\uacbd\ub9dd(Shallow Neural Network)\uc774\ub77c\uace0 \ubd80\ub985\ub2c8\ub2e4. \ub610\ud55c Word2Vec\uc758 \uc740\ub2c9\uce35\uc740 \uc77c\ubc18\uc801\uc778 \uc740\ub2c9\uce35\uacfc\ub294 \ub2ec\ub9ac \ud65c\uc131\ud654 \ud568\uc218\uac00 \uc874\uc7ac\ud558\uc9c0 \uc54a\uc73c\uba70 \ub8e9\uc5c5 \ud14c\uc774\ube14\uc774\ub77c\ub294 \uc5f0\uc0b0\uc744 \ub2f4\ub2f9\ud558\ub294 \uce35\uc73c\ub85c \uc77c\ubc18\uc801\uc778 \uc740\ub2c9\uce35\uacfc \uad6c\ubd84\ud558\uae30 \uc704\ud574 \ud22c\uc0ac\uce35(projection layer)\uc774\ub77c\uace0 \ubd80\ub974\uae30\ub3c4 \ud569\ub2c8\ub2e4.<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"700\" height=\"383\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/1_cuOmGT7NevP9oJFJfVpRKA.png\" alt=\"\" class=\"wp-image-762\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/1_cuOmGT7NevP9oJFJfVpRKA.png 700w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/1_cuOmGT7NevP9oJFJfVpRKA-300x164.png 300w\" sizes=\"(max-width: 700px) 100vw, 700px\" \/><\/figure><\/div>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">corpus = [\n    'he is a king',\n    'she is a queen',\n    'he is a man',\n    'she is a woman',\n    'warsaw is poland capital',\n    'berlin is germany capital',\n    'paris is france capital',\n    'seoul is korea capital', \n    'bejing is china capital',\n    'tokyo is japan capital',\n]\n\ndef tokenize_corpus(corpus):\n    tokens = [x.split() for x in corpus]\n    return tokens\n\ntokenized_corpus = tokenize_corpus(corpus)<\/pre>\n\n\n\n<p>\ub2e8\uc5b4\ub4e4\uc758 \uc911\ubcf5\uc744 \uc81c\uac70\ud558\uc5ec vocabulary \ub9ac\uc2a4\ud2b8\ub97c \ub9cc\ub4e4\uace0 word2idx, idx2word dict\ub97c \ub9cc\ub4ed\ub2c8\ub2e4. <\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">vocabulary = []\nfor sentence in tokenized_corpus:\n    for token in sentence:\n        if token not in vocabulary:\n            vocabulary.append(token)\n\nword2idx = {w: idx for (idx, w) in enumerate(vocabulary)}\nidx2word = {idx: w for (idx, w) in enumerate(vocabulary)}\n\nvocabulary_size = len(vocabulary)<\/pre>\n\n\n\n<p>Skip-Gram\uc774\ub098 CBOW \ubaa8\ub450 window_size \uac00 \ud544\uc694\ud569\ub2c8\ub2e4. \ud574\ub2f9 \ud30c\ub77c\uba54\ud130\ub294 \uc8fc\ubcc0\uc758 \ub2e8\uc5b4\ub97c \uba87\uac1c\uae4c\uc9c0 \ud559\uc2b5\uc5d0 \uc774\uc6a9\ud560 \uac83\uc778\uac00\ub97c \uacb0\uc815\ud574\uc8fc\ub294 \ud30c\ub77c\uba54\ud130\uc785\ub2c8\ub2e4. \uc774\ubc88 \uc608\uc81c\uc5d0\uc11c\ub294 2\uac1c\uc758 \ub2e8\uc5b4\ub9cc \ud559\uc2b5\uc5d0 \ud65c\uc6a9\ud558\ub3c4\ub85d \ud558\uaca0\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">window_size = 2\nidx_pairs = []\n\nfor sentence in tokenized_corpus:\n    indices = [word2idx[word] for word in sentence]\n    for center_word_pos in range(len(indices)):\n        for w in range(-window_size, window_size + 1):\n            context_word_pos = center_word_pos + w\n            if context_word_pos &lt; 0 or context_word_pos >= len(indices) or center_word_pos == context_word_pos:\n                continue\n            context_word_idx = indices[context_word_pos]\n            idx_pairs.append((indices[center_word_pos], context_word_idx))\n\nidx_pairs = np.array(idx_pairs) <\/pre>\n\n\n\n<p>\uc704\uc640 \uac19\uc740 \uacfc\uc815\uc744 \ud1b5\ud574\uc11c idx_pairs\ub97c \ub9cc\ub4e4 \uc218 \uc788\uc2b5\ub2c8\ub2e4. array\uc5d0\uc11c 10\uac1c\ub9cc  \ucd9c\ub825\ud574\ubcf4\uba74 \uc544\ub798\uc640 \uac19\uc740 \ubc30\uc5f4\uc744 \ubcfc \uc218 \uc788\uc2b5\ub2c8\ub2e4. <\/p>\n\n\n\n<p>\uc774\uac83\uc740 &#8220;he is a man&#8221;\uc774\ub77c\ub294 \ub2e8\uc5b4\ub97c \ud559\uc2b5 \ud560 \ub54c\uc5d0 [he, is],[he,a],[is, he],[is,a],[is,man] &#8230; \ud615\ud0dc\uc758 \ud559\uc2b5\ub370\uc774\ud130\uc785\ub2c8\ub2e4. COBOW \ubc29\uc2dd\uc740 \uc8fc\ubcc0\uc758 \ub2e8\uc5b4\ub4e4\uc744 \ud1b5\ud574\uc11c \ubaa9\uc801\ub2e8\uc5b4\ub97c \uc608\uce21\ud558\ub294 \ud615\ud0dc\ub77c\uba74 skip-gram \ubc29\uc2dd\uc740 \ubaa9\uc801\ub2e8\uc5b4\ub97c \ud1b5\ud574\uc11c \uc8fc\ubcc0\uc5d0 \ub098\uc62c \uc218 \uc788\ub294 \ub2e8\uc5b4 [is, a]\ub97c \uc608\uce21\ud558\ub294 \ubc29\ubc95\uc73c\ub85c \ud559\uc2b5\uc774 \uc9c4\ud589\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">print(idx_pairs[0:10])\narray([[0, 1],\n       [0, 2],\n       [1, 0],\n       [1, 2],\n       [1, 3],\n       [2, 0],\n       [2, 1],\n       [2, 3],\n       [3, 1],\n       [3, 2]])<\/pre>\n\n\n\n<p>\uc785\ub825 \ub370\uc774\ud130\ub97c One-Hot \ud615\ud0dc\ub85c \ubcc0\uacbd\ud569\ub2c8\ub2e4. \ucc38\uace0\ub85c One-Hot \ud615\ud0dc\ub97c \uc0ac\uc6a9\ud558\uc9c0 \uc54a\uace0 nn.Embedding()\uc744 \ud1b5\ud574\uc11c \ub8e9\uc5c5\ud14c\uc774\ube14(Look-Up Table)\uc744 \ub9cc\ub4e4\uc5b4 \uc0ac\uc6a9\ud574\ub3c4 \ubb34\ubc29\ud569\ub2c8\ub2e4. nn.Embedding()\uc744 \uc0ac\uc6a9\ud558\ub294 \ubc95\uc740 \uc774\uc804 \uae00\uc5d0\uc11c \ub2e4\ub918\uae30 \ub54c\ubb38\uc5d0 \uc790\uc138\ud55c \ub0b4\uc6a9\uc740 \ud574\ub2f9 \uac8c\uc2dc\ubb3c\uc744 \ucc38\uc870\ud558\uc2dc\uae30 \ubc14\ub78d\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">def get_input_layer(word_idx):\n    return np.eye(vocabulary_size)[word_idx]\n\nX = []\ny = []\nfor data, target in idx_pairs:\n    X.append(get_input_layer(data))\n    y.append(target)\n    \nX = torch.FloatTensor(np.array(X))\ny = torch.Tensor(np.array(y)).long()<\/pre>\n\n\n\n<p>\uc774\uc81c \uc2e0\uacbd\ub9dd \ubaa8\ub4c8\uc744 \uc544\ub798\uc640 \uac19\uc774 \uc0dd\uc131\ud569\ub2c8\ub2e4. \uc785\ub825\uacfc \ucd9c\ub825 \uc0ac\uc774\uc5d0 2\ucc28\uc6d0\uc758 \ubca1\ud130\ud615\ud0dc\ub85c \uc815\ubcf4\ub97c \uc555\ucd95\ud558\uac8c\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">class Word2VecModel(nn.Module):\n    def __init__(self,inout_dim):\n        super().__init__()\n        self.linear1 = nn.Linear(inout_dim,2)\n        self.linear2 = nn.Linear(2,inout_dim)\n        \n    def forward(self,x):\n        return self.linear2(self.linear1(x))\n    \nmodel = Word2VecModel(X.size(dim=-1))<\/pre>\n\n\n\n<p>\uc544\ub798\uc640 \uac19\uc774 \ub370\uc774\ud130\ub97c \ud6c8\ub828\ud569\ub2c8\ub2e4. \uc608\uce21\uce58(prediction)\uc640 \uc2e4\uc81c \uac12(y)\ub97c \ud1b5\ud574\uc11c cost\ub97c \uacc4\uc0b0\ud558\uace0 \uc774\ub97c \ucd9c\ub825\ud574\uc90d\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># optimizer \uc124\uc815\noptimizer = optim.Adam(model.parameters())\n\nnb_epochs = 100\nfor epoch in range(nb_epochs + 1):\n\n    # H(x) \uacc4\uc0b0\n    prediction = model(X)\n\n    # cost \uacc4\uc0b0\n    cost = F.cross_entropy(prediction, y)\n\n    # cost\ub85c H(x) \uac1c\uc120\n    optimizer.zero_grad()\n    cost.backward()\n    optimizer.step()\n    \n    # 20\ubc88\ub9c8\ub2e4 \ub85c\uadf8 \ucd9c\ub825\n    if epoch % 100 == 0:\n        print('Epoch {:4d}\/{} Cost: {:.6f}'.format(\n            epoch, nb_epochs, cost.item()\n        ))<\/pre>\n\n\n\n<p>\ud6c8\ub828\uc774 \uc644\ub8cc\ub41c \ud6c4\uc5d0 \uc0dd\uc131\ub41c weight \uc815\ubcf4\ub97c \ucd9c\ub825\ud574\ubd05\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">vector = model.state_dict()['linear2.weight'] + model.state_dict()['linear2.bias'].view(-1,1)\nw2v_df = pd.DataFrame(vector.numpy(), columns = ['x1', 'x2'])\nw2v_df['word'] = vocab\nw2v_df = w2v_df[['word','x1','x2']]\nw2v_df<\/pre>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"265\" height=\"631\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/\u1109\u1173\u110f\u1173\u1105\u1175\u11ab\u1109\u1163\u11ba-2020-12-30-\u110b\u1169\u110c\u1165\u11ab-12.08.48.png\" alt=\"\" class=\"wp-image-752\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/\u1109\u1173\u110f\u1173\u1105\u1175\u11ab\u1109\u1163\u11ba-2020-12-30-\u110b\u1169\u110c\u1165\u11ab-12.08.48.png 265w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/\u1109\u1173\u110f\u1173\u1105\u1175\u11ab\u1109\u1163\u11ba-2020-12-30-\u110b\u1169\u110c\u1165\u11ab-12.08.48-126x300.png 126w\" sizes=\"(max-width: 265px) 100vw, 265px\" \/><\/figure><\/div>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">ano = w2v_df['word'].values\nx1 = w2v_df['x1'].values\nx2 = w2v_df['x2'].values\n\nfig, ax = plt.subplots(figsize=(5,5))\nax.scatter(x1, x2)\n\nfor i, txt in enumerate(ano):\n    ax.annotate(txt, (x1[i], x2[i]))<\/pre>\n\n\n\n<p>2\ucc28\uc6d0 \ubca1\ud130\ub97c \ud1b5\ud574\uc11c \uc544\ub798\uc640 \uac19\uc774 \uc2dc\uac01\ud654\ud574\ubd05\ub2c8\ub2e4. <\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"315\" height=\"305\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-11.png\" alt=\"\" class=\"wp-image-763\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-11.png 315w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-11-300x290.png 300w\" sizes=\"(max-width: 315px) 100vw, 315px\" \/><\/figure><\/div>\n\n\n\n<h2>Reference<\/h2>\n\n\n\n<p>[1]https:\/\/ratsgo.github.io\/from%20frequency%20to%20semantics\/2017\/03\/30\/word2vec\/<br>[2]https:\/\/towardsdatascience.com\/nlp-101-word2vec-skip-gram-and-cbow-93512ee24314<br>[3]https:\/\/wikidocs.net\/22660<\/p>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>Word2Vec\uc744 pytorch\ub97c \ud1b5\ud574\uc11c \uad6c\ud604\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4. \ud30c\uc774\ud1a0\uce58 \uacf5\uc2dd\ud648\uc5d0\ub3c4 \uc720\uc0ac\ud55c \uc608\uc81c\uac00 \uc788\uc73c\ub2c8 \uad00\uc2ec\uc788\uc73c\uc2e0 \ubd84\ub4e4\uc740 \uacf5\uc2dd\ud648\uc5d0 \uc788\ub294 \ub0b4\uc6a9\uc744 \uc77d\uc5b4\ubcf4\uc2dc\ub294 \uac83\uc774 \ub3c4\uc6c0\uc774 \ub418\uc2dc\ub9ac\ub77c \uc0dd\uac01\ub429\ub2c8\ub2e4. \uba3c\uc800 \uc544\ub798\uc640 \uac19\uc774 \ud544\uc694\ud55c \ub77c\uc774\ube0c\ub7ec\ub9ac\ub4e4\uc744 \uc784\ud3ec\ud2b8\ud569\ub2c8\ub2e4. \ub9c8\uc9c0\ub9c9\uc5d0 \uc784\ud3ec\ud2b8\ud55c matplotlib\uc758 \uacbd\uc6b0\ub294 \uc2dc\uac01\ud654\ub97c \uc704\ud55c \uac83\uc73c\ub85c \ub2e8\uc5b4\ub4e4\uc774 \uc5b4\ub5a4 \uc0c1\uad00\uc131\uc744 \uac00\uc9c0\ub294\uc9c0 \ud655\uc778\ud574\ubcf4\uae30 \uc704\ud568\uc785\ub2c8\ub2e4. \uc544\ub798\uc640 \uac19\uc740 \ud14d\uc2a4\ud2b8\ub97c \uc120\uc5b8\ud569\ub2c8\ub2e4. \uba87\uac1c\uc758 \ub2e8\uc5b4\ub85c \uad6c\uc131\ub41c \ubb38\uc7a5\uc774\uace0 \uc911\ubcf5\ub41c \ubb38\uc7a5\ub4e4\uc744 \ubcf5\uc0ac\ud574\uc11c \ubd99\uc5ec \ub123\uc5c8\uc2b5\ub2c8\ub2e4. Word2Vec\uc744 \uad6c\ud604\ud558\ub294\ub370 \uc5ec\ub7ec \ubc29\uc2dd\uc774 &hellip; <\/p>\n<p class=\"link-more\"><a href=\"http:\/\/blog.cedartrees.co.kr\/index.php\/2020\/12\/30\/word2vec-pytorch\/\" class=\"more-link\">\ub354 \ubcf4\uae30<span class=\"screen-reader-text\"> &#8220;Word2Vec \uad6c\ud604&#8221;<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[24,14],"tags":[100,98,99],"_links":{"self":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts\/751"}],"collection":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/comments?post=751"}],"version-history":[{"count":6,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts\/751\/revisions"}],"predecessor-version":[{"id":767,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts\/751\/revisions\/767"}],"wp:attachment":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/media?parent=751"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/categories?post=751"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/tags?post=751"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}