{"id":47,"date":"2020-06-02T14:04:28","date_gmt":"2020-06-02T05:04:28","guid":{"rendered":"http:\/\/cedartrees.co.kr\/?p=47"},"modified":"2021-04-03T19:22:52","modified_gmt":"2021-04-03T10:22:52","slug":"naive-faq-chatbot-1","status":"publish","type":"post","link":"http:\/\/blog.cedartrees.co.kr\/index.php\/2020\/06\/02\/naive-faq-chatbot-1\/","title":{"rendered":"Naive-FAQ-Chatbot-1"},"content":{"rendered":"<h1>\uac04\ub2e8\ud55c FAQ \ucc57\ubd07\uc744 \ub9cc\ub4e4\uc5b4\ubcf4\uaca0\uc2b5\ub2c8\ub2e4.<\/h1>\n<p>\uc774 \ucc57\ubd07\uc740 \uac04\ub2e8\ud55c \ud615\ud0dc\ub85c \ucc57\ubd07\uc744 \ucc98\uc74c \uc811\ud558\uc2dc\ub294 \ubd84\ub4e4\uc744 \uc704\ud574 \uc791\uc131\ud55c \ucf54\ub4dc\uc815\ub3c4\ub85c \uc0dd\uac01\ud558\uc2dc\uba74 \ub420\ub4ef\ud569\ub2c8\ub2e4.<\/p>\n<p>csv \ud30c\uc77c\uc740 \uc9c8\ubb38\uacfc \uadf8 \uc9c8\ubb38\uc774 \uc18d\ud574 \uc788\ub294 \uce74\ud14c\uace0\ub9ac\uc758 \uc9d1\ud569\uc785\ub2c8\ub2e4.<br \/>\n\uc608\ub97c \ub4e4\uc5b4\uc11c \uc9c8\ubb38\uc758 \ub0b4\uc6a9\uc774 &#8220;\uc0ac\uc6a9 \uc911\uc778 \uc544\uc774\ub514 \ub610\ub294 \uc774\ub984\uc744 \ubcc0\uacbd\ud558\uace0 \uc2f6\uc5b4\uc694&#8221; \uc774\ub77c\uba74 \uc774\uac83\uc740 &#8220;\ud68c\uc6d0&#8221; \uce74\ud14c\uace0\ub9ac\uc5d0 \ub4f1\ub85d\ub41c \uc9c8\ubb38\uc774\ub77c\uace0 \uc778\uc2dd\ud558\uc5ec \uadf8 \uc911\uc5d0\uc11c \ud558\ub098\uc758 \ub2f5\ubcc0\uc744 \ucc3e\uc544 \ub9ac\ud134\ud558\ub294 \ubc29\ubc95\uc785\ub2c8\ub2e4.<\/p>\n<p>\ubcf8 \ud14c\uc2a4\ud2b8 \ub370\uc774\ud130 \uc14b\uc5d0\ub294 [&#8220;\ud68c\uc6d0&#8221;,&#8221;\uad50\uc7ac&#8221;,&#8221;\uc6f9\uc0ac\uc774\ud2b8&#8221;&#8230;] \ucd1d 6\uac1c\uc758 \uce74\ud14c\uace0\ub9ac\uac00 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n<p>\uc989, \uc5b4\ub5a0\ud55c \uc9c8\ubb38\uc744 \uc785\ub825\uc744 \ubc1b\uace0 \uc785\ub825 \ubc1b\uc740 \ub370\uc774\ud130\ub97c \ud1b5\ud574\uc11c \ud574\ub2f9 \uc9c8\ubb38\uc774 \uc5b4\ub5a4 \uce74\ud14c\uace0\ub9ac\uc5d0 \uc18d\ud558\ub294 \uc9c8\ubb38\uc778\uc9c0 \ucc3e\uc544 \ub0b4\ub294 \ubd84\ub958(Classification)\uc758 \ubb38\uc81c\ub85c \uc811\uadfc\ud558\uba74 \ub429\ub2c8\ub2e4.<\/p>\n<p>\uc77c\ub2e8 \uc0ac\uc6a9\ud560 \ub77c\uc774\ube0c\ub7ec\ub9ac\ub97c import \ud569\ub2c8\ub2e4.<br \/>\n\ucd94\uac00\ud55c \ub77c\uc774\ube0c\ub7ec\ub9ac\ub97c \ubcf4\uc2dc\uba74 \uc544\uc2dc\uaca0\uc9c0\ub9cc pytorch\ub85c \uad6c\ud604\ub418\uc5b4 \uc788\ub294 \ucf54\ub4dc\uc785\ub2c8\ub2e4.<br \/>\n\ub098\uc911\uc5d0 Tensorflow\ub098 keras\ub85c \uc791\uc131\ub41c \ucf54\ub4dc\ub3c4 \uc815\ub9ac\ud574\uc11c \uc62c\ub824\ub4dc\ub9ac\uaca0\uc2b5\ub2c8\ub2e4.<\/p>\n<p>\ucf54\ub4dc\uc758 \uad6c\uc131\uc740 \ub2e8\uc704 \uae30\ub2a5\uc744 \uc218\ud589\ud558\ub294 \uba87\uac1c\uc758 \ud30c\uc77c\ub85c \ubd84\ub9ac\ub418\uc5b4 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n<ul>\n<li>train.py<\/li>\n<li>trainer.py<\/li>\n<li>model.py<\/li>\n<li>dataloader.py<\/li>\n<li>predict.py<\/li>\n<\/ul>\n<h3>\uc544\ub798\uc758 \ud30c\uc77c\uc740 train.py \uc785\ub2c8\ub2e4.<\/h3>\n<p>\ud574\ub2f9 \ud30c\uc77c\uc758 \uae30\ub2a5\uc740 \ub370\uc774\ud130 \uc900\ube44, \ubaa8\ub378 \uc14b\ud305, \ud6c8\ub828,&nbsp; \ubaa8\ub378 \uc800\uc7a5\uc758 \uc5ed\ud560\uc744 \uc218\ud589\ud569\ub2c8\ub2e4.<\/p>\n<pre class=\"lang:default decode:true\">import argparse\nimport numpy as np\nfrom konlpy.tag import Okt\n\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\nfrom model import FaqCategoryClassifier\nfrom dataloader import DataLoader\nfrom trainer import Trainer<\/pre>\n<p>\uc544\ub798 \ubd80\ubd84\uc740 csv\ud30c\uc77c\uc744 \ud1b5\ud574\uc11c \ub370\uc774\ud130\ub97c \uc77d\uc5b4 \uc624\ub294 \ubd80\ubd84\uc785\ub2c8\ub2e4.<br \/>\n\ub370\uc774\ud130\ub97c \uc77d\uc740 \ud6c4\uc5d0 x_train, y_train, labels\ub85c \uc815\ubcf4\ub97c \ub9ac\ud134\ud569\ub2c8\ub2e4.<br \/>\n\ud574\ub2f9 \ud30c\uc77c\uc758 type\uc740 numpy \ud615\ud0dc\ub85c \ub4e4\uc5b4\uc624\uace0 \ud559\uc2b5\uc744 \uc704\ud574&nbsp; \ub370\uc774\ud130 \ud0c0\uc785\uc744 \ubcc0\ud658\ud574\uc90d\ub2c8\ub2e4.<br \/>\npytorch\ub294 tensorflow\uc640 \ub2ec\ub9ac Define-by-Run \ud615\ud0dc\uc774\uae30 \ub54c\ubb38\uc5d0 \ucf54\ub4dc\ub97c \uc774\ud574\ud558\uae30\uac00 \ud3b8\ub9ac\ud569\ub2c8\ub2e4.<\/p>\n<p>* \uc774\ubbf8\uc9c0\ub97c \ud3ec\ud568\ud55c \uac04\ub2e8\ud55c \uc124\uba85\uc774 \uc788\uc73c\ub2c8 \ucc38\uace0\ud558\uc2dc\uba74 \uc774\ud574\ud558\uc2dc\uae30 \uc88b\uc744\ub4ef\ud569\ub2c8\ub2e4.<br \/>\n<a href=\"https:\/\/medium.com\/@zzemb6\/define-and-run-vs-define-by-run-b527d127e13a\">https:\/\/medium.com\/@zzemb6\/define-and-run-vs-define-by-run-b527d127e13a<\/a><\/p>\n<p>\ud559\uc2b5\uc6a9 \ub370\uc774\ud130\ub294 \uc801\ub2f9\ud788 \uc11e\uc5b4\uc90d\ub2c8\ub2e4. \uc774\ub54c \uac01 feature \ub370\uc774\ud130\uc640 label \ub370\uc774\ud130\uac00 \uc11e\uc774\uc9c0 \uc54a\ub3c4\ub85d \ubc18\ub4dc\uc2dc \uc8fc\uc758\ud574\uc57c \ud569\ub2c8\ub2e4. \uadf8\ub9ac\uace0 \ub9c8\uc9c0\ub9c9\uc5d0 \ud574\ub2f9 \ub370\uc774\ud130\uc758&nbsp; shape\uc744 \ud45c\uc2dc\ud574\ubcf4\uace0 \ub370\uc774\ud130\uac00 \uc798 \ub4e4\uc5b4\uc654\ub294\uc9c0 \ud655\uc778\ud574\ubd05\ub2c8\ub2e4.<\/p>\n<p>\ucc38\uace0\ub85c \uc77d\uc5b4\uc628 \ub370\uc774\ud130\uc5d0\uc11c \ud6c8\ub828\uc6a9 \uc138\ud2b8\uc640 \uac80\uc99d\uc6a9 \uc138\ud2b8\ub97c \ubd84\ub9ac\ud569\ub2c8\ub2e4. \ubcf8 \uc2e4\ud5d8\uc5d0\uc11c\ub294 7:3\uc815\ub3c4\ub85c \ubd84\ub9ac\ud558\uc5ec \uc0ac\uc6a9\ud569\ub2c8\ub2e4.<\/p>\n<pre class=\"lang:default decode:true\"> ## Data Read\n dataloader = DataLoader('.\/data\/faq.categories.extend.csv', okt)\n x_train, y_train, labels = dataloader.prepareDataset()\n\n x_train = torch.FloatTensor(x_train)\n y_train = torch.LongTensor(y_train)\n\n train_cnt = int(x_train.size(0) * 0.7)\n valid_cnt = x_train.size(0) - train_cnt\n\n indices = torch.randperm(x_train.size(0))\n x = torch.index_select(x_train, dim=0, index=indices).split([train_cnt, valid_cnt], dim=0) # x[0] x_train, x[1] x_train valid data\n y = torch.index_select(y_train, dim=0, index=indices).split([train_cnt, valid_cnt], dim=0) # y[0] y_train, y[1] y_train valid label\n\n print('Train', x[0].shape, x[1].shape)\n print('Valid', y[0].shape, y[1].shape)<\/pre>\n<p>\ub370\uc774\ud130\uac00 \uc900\ube44\ub418\uc5c8\uc73c\uba74 \uc774\uc81c \ubaa8\ub378\uc744 \uc0dd\uc131\ud569\ub2c8\ub2e4.<br \/>\n\ubaa8\ub378\uc740 \uc785\ub825(IPT)\uacfc \ucd9c\ub825(Hidden), \uadf8\ub9ac\uace0 \ucd5c\uc885 \ucd9c\ub825(OPT)\uc758 \ud615\ud0dc\ub85c \ub098\ud0c0\ub0bc \uc218 \uc788\uc2b5\ub2c8\ub2e4. \uc774\ub54c \ucd5c\uc885 \ucd9c\ub825\uc740 FAQ\uc758 \uce74\ud14c\uace0\ub9ac \uc989 6\uac1c \uc911 \ud558\ub098\uc778 one-hot\uc758 \ud615\ud0dc\ub85c \ucd9c\ub825\ud569\ub2c8\ub2e4.<br \/>\n\uadf8\ub9ac\uace0 GD \uc54c\uace0\ub9ac\uc998 \uc911\uc5d0 \ud558\ub098\uc778 Adam\uc744 \uc0ac\uc6a9\ud558\uace0 \ubd84\ub958\ubaa8\ub378\uc758 \uc190\uc2e4\ud568\uc218\ub85c cross-entropy\ub97c \uc0ac\uc6a9\ud569\ub2c8\ub2e4.<\/p>\n<pre class=\"lang:default decode:true\">## Model Setting\nIPT = 196\nH = 100\nOPT = len(labels)\nmodel = FaqCategoryClassifier(IPT, H, OPT)\n\noptimizer = optim.Adam(model.parameters())\nloss = nn.CrossEntropyLoss()<\/pre>\n<p>\ubaa8\ub378\uc740 \ub2e4\uc74c\uacfc \uac19\uc774 \uc0dd\uc131\ud569\ub2c8\ub2e4.<br \/>\n\ubaa8\ub378\uc740 nn.Module\uc744 \uc0c1\uc18d \ubc1b\uc544\uc11c \ud074\ub798\uc2a4 \ud30c\uc77c \ud615\ud0dc\ub85c \uc791\uc131\ud569\ub2c8\ub2e4.<br \/>\n\uc704\uc758 \ubaa8\ub378\uc740 \uac04\ub2e8\ud55c \ud615\ud0dc\uc758 \uc815\ubcf4\uc785\ub2c8\ub2e4. \ub354 \ub192\uc740 \ud559\uc2b5 \uacb0\uacfc\ub97c \uc5bb\uae30 \uc704\ud574\uc11c\ub294 \ubaa8\ub378\uc758 \ub808\uc774\uc5b4\ub97c \uc798\uad6c\uc131\ud574\uc57c \ud569\ub2c8\ub2e4. \ub9e8 \ub9c8\uc9c0\ub9c9\uc5d0 \ucd5c\uc885 \ucd9c\ub825\uc758 shape\uc744 \ub123\uc5b4\uc57c \ud55c\ub2e4\ub294 \uac83\uc744 \uae30\uc5b5\ud558\uc2dc \ubc14\ub78d\ub2c8\ub2e4. \ub9cc\uc77c \uc774 \uc815\ubcf4\uac00 \ub9de\uc9c0 \uc54a\uc744 \uacbd\uc6b0 \uc5d0\ub7ec \ucf54\ub4dc\ub97c \ud45c\uc2dc\ud569\ub2c8\ub2e4.<\/p>\n<pre class=\"lang:default decode:true\">class FaqCategoryClassifier(nn.Module):\n    def __init__(self, IPT, H, OPT):\n        print('FaqCategoryClassifier Load!')\n        super().__init__()\n\n        self.layers = nn.Sequential(\n            nn.Linear(IPT, H),\n            nn.Linear(H, 50),\n            nn.Linear(50, 20),\n            nn.Linear(20, OPT)\n        )\n        \n\n    def forward(self, x):\n        return self.layers(x)<\/pre>\n<p>\uc774\uc81c\ub294 \uc544\ub798\uc640 \uac19\uc740 \ubc29\ubc95\uc73c\ub85c \ubaa8\ub378\uc744 \ud6c8\ub828\uc2dc\ud0b5\ub2c8\ub2e4.<br \/>\nTrainer \ud074\ub798\uc2a4\ub294 \ub2e4\uc74c \ud3b8\uc5d0\uc11c \ub0b4\uc6a9\uc744 \uc124\uba85\ud574\ub4dc\ub9ac\uaca0\uc2b5\ub2c8\ub2e4.<br \/>\n\uc77c\ub2e8 Trainer\uc5d0\uc11c \ud65c\uc6a9\ud558\ub294 \ub370\uc774\ud130\ub294 \uc785\ub825\uac12\uacfc \uac80\uc99d\uac12 \ub370\uc774\ud130\ub4e4\uacfc \uac01\uac01\uc758 \ub808\uc774\ube14 \uc815\ubcf4\uc785\ub2c8\ub2e4.<\/p>\n<pre class=\"lang:default decode:true\">## Trainer\ntrainer = Trainer(model, optimizer, loss)\ntrainer.train((x[0], y[0]), (x[1], y[1]), config)<\/pre>\n<p>\ud6c8\ub828\uc774 \uc644\ub8cc\ub418\uba74 \ud574\ub2f9 \ubaa8\ub378\uc744 \uc800\uc7a5\ud569\ub2c8\ub2e4.<br \/>\n\uc774\ub54c \uc800\uc7a5\ud560 \ub370\uc774\ud130\ub294 \ubaa8\ub378 \ub370\uc774\ud130 \uc678\uc5d0\ub3c4 \ub2e4\uc591\ud55c \ub370\uc774\ud130\ub97c \ud568\uaed8 \uc800\uc7a5\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<br \/>\n\uc544\ub798\uc758 \ucf54\ub4dc\ub294 \ud658\uacbd\uc815\ubcf4(config)\uc640 \ub808\uc774\ube14 \uc815\ubcf4\ub97c \uac19\uc774 \uc800\uc7a5\ud569\ub2c8\ub2e4.<br \/>\n\uc774 \uc678\uc5d0\ub3c4 \ud544\uc694\ud55c \uc815\ubcf4\uac00 \uc788\ub2e4\uba74 \uac19\uc774 \uc800\uc7a5\ud569\ub2c8\ub2e4.<\/p>\n<pre class=\"lang:default decode:true\">## Save Model\ntorch.save({'model':trainer.model.state_dict(), 'config':config, 'labels':labels}, config.model_fn)<\/pre>\n<p>\uc774\ub807\uac8c train.py \ud30c\uc77c\uc5d0\ub294 \ub370\uc774\ud130 \uc900\ube44-\ubaa8\ub378 \uc14b\ud305-\ud6c8\ub828-\ubaa8\ub378 \uc800\uc7a5\uc758 \ub2e8\uacc4\ub97c \uac70\uce58\uac8c\ub429\ub2c8\ub2e4.<\/p>\n<p>\ub2e4\uc74c \ucf54\ub4dc\uc5d0\uc11c\ub294 trainer.py\uac00 \uc5b4\ub5bb\uac8c \uad6c\uc131\ub418\uc5b4 \uc788\ub294\uc9c0 \ubcf4\uaca0\uc2b5\ub2c8\ub2e4.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\uac04\ub2e8\ud55c FAQ \ucc57\ubd07\uc744 \ub9cc\ub4e4\uc5b4\ubcf4\uaca0\uc2b5\ub2c8\ub2e4. \uc774 \ucc57\ubd07\uc740 \uac04\ub2e8\ud55c \ud615\ud0dc\ub85c \ucc57\ubd07\uc744 \ucc98\uc74c \uc811\ud558\uc2dc\ub294 \ubd84\ub4e4\uc744 \uc704\ud574 \uc791\uc131\ud55c \ucf54\ub4dc\uc815\ub3c4\ub85c \uc0dd\uac01\ud558\uc2dc\uba74 \ub420\ub4ef\ud569\ub2c8\ub2e4. csv \ud30c\uc77c\uc740 \uc9c8\ubb38\uacfc \uadf8 \uc9c8\ubb38\uc774 \uc18d\ud574 \uc788\ub294 \uce74\ud14c\uace0\ub9ac\uc758 \uc9d1\ud569\uc785\ub2c8\ub2e4. \uc608\ub97c \ub4e4\uc5b4\uc11c \uc9c8\ubb38\uc758 \ub0b4\uc6a9\uc774 &#8220;\uc0ac\uc6a9 \uc911\uc778 \uc544\uc774\ub514 \ub610\ub294 \uc774\ub984\uc744 \ubcc0\uacbd\ud558\uace0 \uc2f6\uc5b4\uc694&#8221; \uc774\ub77c\uba74 \uc774\uac83\uc740 &#8220;\ud68c\uc6d0&#8221; \uce74\ud14c\uace0\ub9ac\uc5d0 \ub4f1\ub85d\ub41c \uc9c8\ubb38\uc774\ub77c\uace0 \uc778\uc2dd\ud558\uc5ec \uadf8 \uc911\uc5d0\uc11c \ud558\ub098\uc758 \ub2f5\ubcc0\uc744 \ucc3e\uc544 \ub9ac\ud134\ud558\ub294 \ubc29\ubc95\uc785\ub2c8\ub2e4. \ubcf8 \ud14c\uc2a4\ud2b8 \ub370\uc774\ud130 \uc14b\uc5d0\ub294 &hellip; <\/p>\n<p class=\"link-more\"><a href=\"http:\/\/blog.cedartrees.co.kr\/index.php\/2020\/06\/02\/naive-faq-chatbot-1\/\" class=\"more-link\">\ub354 \ubcf4\uae30<span class=\"screen-reader-text\"> &#8220;Naive-FAQ-Chatbot-1&#8221;<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[14,15],"tags":[5,86,61,55,4],"_links":{"self":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts\/47"}],"collection":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/comments?post=47"}],"version-history":[{"count":6,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts\/47\/revisions"}],"predecessor-version":[{"id":139,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts\/47\/revisions\/139"}],"wp:attachment":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/media?parent=47"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/categories?post=47"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/tags?post=47"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}