{"id":720,"date":"2020-12-22T19:05:53","date_gmt":"2020-12-22T10:05:53","guid":{"rendered":"http:\/\/cedartrees.co.kr\/?p=720"},"modified":"2021-04-03T19:08:46","modified_gmt":"2021-04-03T10:08:46","slug":"k-means-cluster","status":"publish","type":"post","link":"http:\/\/blog.cedartrees.co.kr\/index.php\/2020\/12\/22\/k-means-cluster\/","title":{"rendered":"K-Means \ud074\ub7ec\uc2a4\ud130"},"content":{"rendered":"\n<p>\ud559\uc2b5\uc758 \uc885\ub958\uc5d0\ub294 \ub808\uc774\ube14 \ub370\uc774\ud130\uc758 \ud3ec\ud568 \uc5ec\ubd80\uc5d0 \ub530\ub77c\uc11c \uc9c0\ub3c4\ud559\uc2b5(Supervised Learning)\uacfc \ube44\uc9c0\ub3c4\ud559\uc2b5(Unsupervised Learning)\uc73c\ub85c \ub098\ub20c \uc218 \uc788\uc2b5\ub2c8\ub2e4. \uc9c0\ub3c4\ud559\uc2b5 \uc54c\uace0\ub9ac\uc998\uc740 \uc885\uc18d\ubcc0\uc218\uc758 \ud2b9\uc9d5\uc5d0 \ub530\ub77c\uc11c \ubd84\ub958(Classification)\uc640 \ud68c\uadc0(Regression)\ub85c \ub098\ub20c \uc218 \uc788\uc2b5\ub2c8\ub2e4.<br>\ubc18\uba74 \ube44\uc9c0\ub3c4 \ud559\uc2b5\uc740 \ub808\uc774\ube14 \ub370\uc774\ud130\uac00 \uc5c6\ub294 \ud615\ud0dc\uc758 \ub370\uc774\ud130\ub85c \ud074\ub7ec\uc2a4\ud130\ub9c1(Clustering)\uc774 \uac00\uc7a5 \ub300\ud45c\uc801\uc778 \uae30\ubc95\uc785\ub2c8\ub2e4. \ud559\uc2b5 \ub370\uc774\ud130\uc758 Feature\ub97c \ud30c\uc545\ud574\uc11c \ud2b9\uc9d5\uc744 \ucd94\ucd9c\ud558\uace0 \uc774\ub97c \ud1b5\ud574\uc11c \uacf5\ud1b5\uc810\uc774 \uc788\ub294 \ub370\uc774\ud130\ub97c \ubb36\uc5b4\uc8fc\ub294 \ud615\ud0dc\uc758 \ubd84\uc11d\uae30\ubc95\uc785\ub2c8\ub2e4.<\/p>\n\n\n\n<p>\uc608\ub97c \ub4e4\uc5b4\uc11c \ud074\ub7ec\uc2a4\ud130\ub9c1 \uae30\ubc95\uc744 \ud65c\uc6a9\ud574\uc11c \uace0\uac1d\uc758 \uad6c\ub9e4\ud2b9\uc9d5\uc5d0 \ub530\ub77c\uc11c \uace0\uac1d\uad70\uc744 \ubb36\uc5b4 \uc904 \uc218 \uc788\uc2b5\ub2c8\ub2e4. \uadf8\ub807\uac8c \ub41c\ub2e4\uba74 \uace0\uac1d\uad70\uc744 \ub300\uc0c1\uc73c\ub85c\ud558\ub294 \ub9de\ucda4\ud615 \ub9c8\ucf00\ud305\ub3c4 \uac00\ub2a5\ud569\ub2c8\ub2e4. \uc774\ubc16\uc5d0\ub3c4 \ub124\ud2b8\uc6cc\ud06c \uc720\ud574 \ud2b8\ub798\ud53d \ud0d0\uc9c0, \uc601\ud654\ub098 TV \uc7a5\uba74 \ubd84\ub958, \uad8c\uc5ed \uc124\uc815, \ub274\uc2a4\ub098 \ud1a0\ud53d \ud074\ub7ec\uc2a4\ud130\ub9c1 \ub4f1 \ub2e4\uc591\ud55c \ubd84\uc57c\uc5d0 \ud65c\uc6a9\ub429\ub2c8\ub2e4. \uadf8\ub9ac\uace0 \uc774\ub7ec\ud55c \uc54c\uace0\ub9ac\uc998\uc740 \ub2e8\ub3c5\uc73c\ub85c \uc0ac\uc6a9\ub418\uae30\ub3c4 \ud558\uc9c0\ub9cc \ub610 \uc5ec\ub7ec \ub2e4\ub978 \uc54c\uace0\ub9ac\uc998\uacfc \uacb0\ud569\ub418\uc11c \uc0ac\uc6a9\ub418\uae30\ub3c4 \ud569\ub2c8\ub2e4.<\/p>\n\n\n\n<p>\uc774\uc911\uc5d0\uc11c K-Means \uc54c\uace0\ub9ac\uc998\uc740 \uac00\uc7a5 \uc720\uba85\ud55c \ud074\ub7ec\uc2a4\ud130\ub9c1 \uc54c\uace0\ub9ac\uc998\uc785\ub2c8\ub2e4. \u201cK\u201d\ub294 \ubd84\uc11d \ub300\uc0c1 \ub370\uc774\ud130\ub85c\ubd80\ud130 \ud074\ub7ec\uc2a4\ud130 \ud560 \uc218 \uc788\ub294 \uc218\ub97c \uc758\ubbf8\ud569\ub2c8\ub2e4. \uadf8\ub9ac\uace0 Means\ub294 \uac01 \ud074\ub7ec\uc2a4\ud130\uc758 \uc911\uc2ec\uc758 \ud3c9\uade0\uac70\ub9ac\ub97c \uc758\ubbf8\ud569\ub2c8\ub2e4. \uc885\ud569\ud574\ubcf4\uba74 \uac01 \ud074\ub7ec\uc2a4\ud130\uc758 \uc911\uc2ec(Centroid)\uc744 \uae30\uc900\uc73c\ub85c \uc8fc\ubcc0\uc5d0 \uc788\ub294 \uac00\uae4c\uc6b4 \ub370\uc774\ud130\ub4e4\uc744 \ud558\ub098\ub85c \ubb36\uc5b4\uc8fc\ub294 \uacfc\uc815\uc774\ub77c\uace0 \ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n<p>K-Means \uc54c\uace0\ub9ac\uc998\uc740 \uc544\ub798\uc640 \uac19\uc740 \uacfc\uc815\uc73c\ub85c \uc218\ud589\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n<ol><li>Centroid \uc124\uc815<\/li><li>\uac01 \ub370\uc774\ud130\ub4e4\uc744 \uac00\uae4c\uc6b4 Centroid\uc5d0 \uc18d\ud55c \uadf8\ub8f9\uc5d0 \ud560\ub2f9<\/li><li>2\ubc88 \uacfc\uc815\uc744 \ud1b5\ud574\uc11c \ub9cc\ub4e4\uc5b4\uc9c4 \ud074\ub7ec\uc2a4\ud130\uc758 Centroid\ub97c \uc0c8\ub86d\uac8c \uc9c0\uc815<\/li><li>2\ubc88,3\ubc88\uc758 \uacfc\uc815\uc744 Centroid\uac00 \ubcc0\ud558\uc9c0 \uc54a\uc744\ub54c\uae4c\uc9c0 \ubc18\ubcf5 \uc218\ud589<\/li><\/ol>\n\n\n\n<p>\uc544\ub798\uc758 \uc608\uc81c\ub97c \ud1b5\ud574\uc11c \uae30\ubcf8\uc801\uc778 \ucee8\uc149\uc744 \uc54c\uc544\ubcf4\uaca0\uc2b5\ub2c8\ub2e4. \uba3c\uc800 \ud544\uc694\ud55c \ub77c\uc774\ube0c\ub7ec\ub9ac\ub4e4\uc744 \uc784\ud3ec\ud2b8\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from sklearn.datasets import make_blobs\nimport pandas as pd\nimport numpy as np\nimport math\nimport scipy as sp\nimport matplotlib.pyplot as plt<\/pre>\n\n\n\n<p>\ud14c\uc2a4\ud2b8\uc6a9 \ub370\uc774\ud130\ub97c \ub9cc\ub4e4\uae30 \uc704\ud574\uc11c sklearn\uc758 make_blobs() \ud568\uc218\ub97c \uc0ac\uc6a9\ud569\ub2c8\ub2e4. \uc0d8\ud50c \ub370\uc774\ud130\ub294 [300 \u00d7 2] \ud589\ub82c\uc785\ub2c8\ub2e4. \ub370\uc774\ud130\ub294 4\uac1c\uc758 \uad70\uc9d1\uc744 \uac00\uc9c0\uace0 \uc788\uc2b5\ub2c8\ub2e4. <\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">x, y = make_blobs(n_samples=300, centers=4, n_features=2)\ndf = pd.DataFrame(x, y, columns=['x','y']).reset_index(drop=True)<\/pre>\n\n\n\n<p>\uc0dd\uc131\ud55c \ub370\uc774\ud130\uc5d0\uc11c 4\uac1c\uc758 centroid \uac12\uc744 \uc784\uc758\ub85c \ucd94\ucd9c\ud574\ubd05\ub2c8\ub2e4. \uc0d8\ud50c \ub370\uc774\ud130\uc758 \uadf8\ub8f9\uacfc \ucd94\ucd9c\ud55c \uac12\uc744 \ubd89\uc740\uc0c9 \uc810\uc73c\ub85c \ud45c\uc2dc\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4. \uadf8\ub9bc1\uc744 \ubcf4\ub2c8 4\uac1c\uc758 \uad70\uc9d1\uc744 \uc774\ub8e8\ub294 \ub370\uc774\ud130\ub97c \ud655\uc778\ud588\uc2b5\ub2c8\ub2e4. \ub610 4\uac1c\uc758 \uc784\uc758\uc758 \uc810\uc744 \ud45c\uc2dc\ud55c \ubd80\ubd84\uc744 \ubcf4\ub2c8 \uac01 \uad70\uc9d1\uc758 \uc911\uc559\uac12\uacfc\ub294 \uc0c1\ub2f9\ud788 \uac70\ub9ac\uac00 \uba40\uc5b4\ubcf4\uc785\ub2c8\ub2e4. \uc774\uc81c \uc774\ub7ec\ud55c \ud559\uc2b5\ub370\uc774\ud130\ub97c \ud1b5\ud574\uc11c Clustering\uc744 \ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">centroids = df.sample(4)\nplt.scatter(x[:,0], x[:,1])\nplt.scatter(centroids['x'], centroids['y'], c='r')<\/pre>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"386\" height=\"248\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-1-2.png\" alt=\"\" class=\"wp-image-722\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-1-2.png 386w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-1-2-300x193.png 300w\" sizes=\"(max-width: 386px) 100vw, 386px\" \/><figcaption>\uadf8\ub9bc1. \uc784\uc758\uc758 Centroid \uc124\uc815<\/figcaption><\/figure><\/div>\n\n\n\n<p>\uc544\ub798\uc758 \ud568\uc218\ub294 \uac01 \ud14c\uc2a4\ud2b8 \ub370\uc774\ud130\uc640 4\uac1c\uc758 \uc810\uc758 \uac70\ub9ac\ub97c \uacc4\uc0b0\ud558\ub294 \ud568\uc218\uc785\ub2c8\ub2e4. \uc544\ub798\uc758 \ud568\uc218\ub97c \uc2e4\ud589\ud558\uba74 [300 \u00d7 4]\uc758 \ud589\ub82c\uc774 \ub098\uc624\uac8c\ub429\ub2c8\ub2e4. \uadf8 \uc774\uc720\ub294 4\uac1c\uc758 \uc911\uc559\uac12\uacfc \uc0d8\ud50c \ub370\uc774\ud130\uc758 \uac70\ub9ac\ub97c \uce21\uc815\ud558\uae30 \ub54c\ubb38\uc785\ub2c8\ub2e4. \uc774\ub807\uac8c \uce21\uc815\ud55c \uac12\uc5d0\uc11c np.argmin() \ud568\uc218\ub97c \uc2e4\ud589\ud558\uba74 4\uac1c\uc758 \uc810 \uc911\uc5d0\uc11c \uac00\uc7a5 \uac00\uae4c\uc6b4 \uc810\uc758 \uac12\uc744 \ub9ac\ud134\ud558\uac8c\ub429\ub2c8\ub2e4. \uadf8 \ub370\uc774\ud130\ub97c cluster_num\uc5d0 \uc785\ub825\ud569\ub2c8\ub2e4. <\/p>\n\n\n\n<p>\uadf8\ub9ac\uace0 result\ub77c\ub294 \ub370\uc774\ud130\uc14b\uc744 \ub9ac\ud134\ud558\uac8c\ub429\ub2c8\ub2e4. \uadf8\ub7ec\ub2c8\uae4c result \ub370\uc774\ud130\uc14b\uc740 pandas dataframe\uc758 \uc790\ub8cc\ud615\ud0dc\ub97c \uac00\uc9c0\uace0 \uc788\uace0 \uceec\ub7fc\uc740 \uae30\uc874\uc758 x, y\uc678\uc5d0 0,1,2,3 \uc911\uc5d0 \ud558\ub098\uc758 \uac12\uc744 \ub2f4\uace0 \uc788\ub294 cluster\ub77c\ub294 \uc0c8\ub85c\uc6b4 \uceec\ub7fc\uc744 \ud3ec\ud568\ud558\uace0 \uc788\uc2b5\ub2c8\ub2e4. <\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">def get_distance(center_df):\n    # \uac01 \ub370\uc774\ud130\uc5d0 \ub300\ud558\uc5ec, \uac01 \uc911\uc2ec\uc810\uacfc\uc758 \uc720\ud074\ub9ac\ub4dc \uac70\ub9ac \uacc4\uc0b0\n    distance = sp.spatial.distance.cdist(df, center_df, \"euclidean\")\n    cluster_num = np.argmin(distance, axis=1)\n    result = df.copy()\n    result[\"cluster\"] = np.array(cluster_num)\n    return result<\/pre>\n\n\n\n<p>\uc774 dataframe\uc5d0\uc11c cluster\ub85c groupby\ud55c \ud6c4\uc5d0 \ud3c9\uade0\uac12\uc744 \uacc4\uc0b0\ud558\uba74 \uac01 \uadf8\ub8f9\uc758 x, y\uac12 \uc88c\ud45c\ub97c \ub9ac\ud134\ud558\uac8c\ub429\ub2c8\ub2e4. \uadf8 \uac12\uc744 \uc544\ub798\uc758 scatter \uadf8\ub798\ud504\ub85c \ud45c\uc2dc\ud574\ubcf4\ubcc0 \uadf8\ub9bc2\uc640 \uac19\uc740 \ud615\ud0dc\uac00 \ud45c\uc2dc\ub429\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\"># cluster\ubcc4\ub85c \ubb36\uc5b4\uc11c \ud3c9\uade0 \uacc4\uc0b0\nc = r.groupby(\"cluster\").mean()\nr = get_distance(c)\nplt.scatter(r['x'], r['y'], c=r['cluster'])<\/pre>\n\n\n\n<p>\uadf8\ub9bc2 \uadf8\ub798\ud504\ub294 \ud55c\ub208\uc5d0 \ubd10\ub3c4 clustering\uc774 \uc548\ub3fc\ubcf4\uc785\ub2c8\ub2e4. \uc774\uc81c 4\uac00\uc9c0 \uc0c9\uc758 \uad70\uc9d1\uc758 \uc911\uc559\uac12\uc744 \uad6c\ud55c \ud6c4\uc5d0 centroid\ub97c \uc62e\uaca8\uc8fc\uace0 \ub2e4\uc2dc \uac70\ub9ac\ub97c \uacc4\uc0b0\ud574\ubd05\ub2c8\ub2e4. \uc774\ub7f0 \uacfc\uc815\uc744 \uacc4\uc18d\ud574\ubcf4\uba74 \uadf8\ub9bc3, \uadf8\ub9bc4\uc640 \uac19\uc774 \uc810\uc810 cluster\ubcc4\ub85c \uac01\uae30 \ub2e4\ub978 \uc0c9\uc73c\ub85c \ubd84\ub958\ub418\ub294 \uac83\uc744 \ud655\uc778 \ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"386\" height=\"248\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-2-3.png\" alt=\"\" class=\"wp-image-723\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-2-3.png 386w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-2-3-300x193.png 300w\" sizes=\"(max-width: 386px) 100vw, 386px\" \/><figcaption>\uadf8\ub9bc2. \uccab\ubc88\uc9f8 \uc218\ud589<\/figcaption><\/figure><\/div>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"386\" height=\"248\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-8.png\" alt=\"\" class=\"wp-image-724\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-8.png 386w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-8-300x193.png 300w\" sizes=\"(max-width: 386px) 100vw, 386px\" \/><figcaption>\uadf8\ub9bc3. \ub450\ubc88\uc9f8 \uc218\ud589<\/figcaption><\/figure><\/div>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"386\" height=\"248\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-1-3.png\" alt=\"\" class=\"wp-image-725\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-1-3.png 386w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-1-3-300x193.png 300w\" sizes=\"(max-width: 386px) 100vw, 386px\" \/><figcaption>\uadf8\ub9bc4. 3\ubc88\uc9f8 \uc218\ud589<\/figcaption><\/figure><\/div>\n\n\n\n<p>\uc989, centroid \uac12\uc744 \uc784\uc758\ub85c \uc815\ud574\uc8fc\uace0 \uadf8 \ud3ec\uc778\ud2b8\ub97c \uc911\uc2ec\uc73c\ub85c clustering\uc744 \uc218\ud589\ud55c \ud6c4\uc5d0 clustering\ud55c \uac12\uc744 \uc911\uc2ec\uc73c\ub85c \ub2e4\uc2dc centroid\ub97c \uc815\ud558\uace0 \ub2e4\uc2dc clustering\uc744 \uc218\ud589\ud558\ub294 \uc791\uc5c5\uc744 \ub354\uc774\uc0c1\uc758 centroid \uac12\uc774 \ubcc0\ud654\uac00 \uc5c6\uc744 \ub54c\uae4c\uc9c0 \uc218\ud589\ud558\uba74 \uad70\uc9d1\uc774 \ud615\uc131\ub418\ub294 \uac83\uc774 \ubc14\ub85c \uad70\uc9d1\ubd84\uc11d\uc758 \uae30\ubcf8 \uc54c\uace0\ub9ac\uc998\uc785\ub2c8\ub2e4.<\/p>\n\n\n\n<h4>sklearn K-Means \uc0ac\uc6a9<\/h4>\n\n\n\n<p>sklearn\uc5d0\ub294 \ub2e4\ub978 \uba38\uc2e0\ub7ec\ub2dd \uc54c\uace0\ub9ac\uc998\uacfc \ub9c8\ucc2c\uac00\uc9c0\ub85c \ube44\uc9c0\ub3c4\ud559\uc2b5\uc744 \uc704\ud55c \ud074\ub7ec\uc2a4\ud130\ub9c1 \uc54c\uace0\ub9ac\uc998\uc778 K-Means \uc54c\uace0\ub9ac\uc998\uc744 \ud328\ud0a4\uc9c0 \ud615\ud0dc\ub85c \uc81c\uacf5\ud558\uace0 \uc788\uc2b5\ub2c8\ub2e4. sklearn\uc744 \uc0ac\uc6a9\ud558\uba74 \ubc29\uae08 \uc704\uc5d0\uc11c \ud588\ub358\uac83\uacfc \uac19\uc740 \ubcf5\uc7a1\ud55c \uc791\uc5c5\uc744 \ub300\uc2e0\ud574\uc8fc\uae30 \ub54c\ubb38\uc5d0 \ud3b8\ub9ac\ud558\uac8c \ub370\uc774\ud130 \ubd84\uc11d\uc744 \ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n<p>\ud14c\uc2a4\ud2b8 \ub370\uc774\ud130\ub85c \uadf8\ub3d9\uc548 \uc0ac\uc6a9\ud588\ub358 fitness.csv \ub370\uc774\ud130\ub97c \ud65c\uc6a9\ud574\uc11c \ud14c\uc2a4\ud2b8\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4. \ub370\uc774\ud130\uc758 \uc218\uac00 \ub9ce\uc9c0 \uc54a\uc544\uc11c \ubd84\ub958 \uacb0\uacfc\uac00 \uc544\uc27d\uac8c\ub3c4 \uc88b\uc9c0 \uc54a\uc9c0\ub9cc \uadf8\ub798\ub3c4 \uc218\ud589\ud558\ub294 \ubc29\ubc95\uc5d0 \ub300\ud574\uc11c \uac00\uc774\ub4dc\uac00 \ub420 \uc218 \uc788\uc744\ub4ef\ud569\ub2c8\ub2e4. \ud14c\uc2a4\ud2b8\ub294 \uba3c\uc800 \ud30c\uc77c\uc744 \uc77d\uc740 \ub2e4\uc74c \ud574\ub2f9 \ub370\uc774\ud130\uc14b\uc740 \ub808\uc774\ube14\uc774 \uc5c6\uae30 \ub54c\ubb38\uc5d0 \ud14c\uc2a4\ud2b8 \uc0bc\uc544 \uc784\uc758\ub85c \ub808\uc774\ube14\uc744 \ub9cc\ub4e4\uc5b4\ubcf4\uace0 \ud074\ub7ec\uc2a4\ud130\uac00 \uc5b4\ub5bb\uac8c \uc608\uce21\ud588\ub294\uc9c0 \ube44\uad50\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4. \ub2e4\uc2dc \ub9d0\uc500\ub4dc\ub9ac\uc9c0\ub9cc \uc815\ud655\uc131\uc744 \uc704\ud574\uc11c \uc218\ud589\ud558\ub294 \ubd80\ubd84\uc740 \uc544\ub2d8\uc744 \uc54c\ub824\ub4dc\ub9bd\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">dataset = pd.read_csv('.\/fitness.csv')\ndataset.loc[ dataset['age'] &lt; 40, 'ACODE']= 0\ndataset.loc[ (dataset['age'] >= 40) &amp; (dataset['age'] &lt;50), 'ACODE']= 1\ndataset.loc[ (dataset['age'] >= 50) &amp; (dataset['age'] &lt;60), 'ACODE']= 2\ndataset['ACODE'] = dataset['ACODE'].astype('int32')\ndataset.info()<\/pre>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">&lt;class 'pandas.core.frame.DataFrame'>\nRangeIndex: 31 entries, 0 to 30\nData columns (total 8 columns):\n #   Column    Non-Null Count  Dtype  \n---  ------    --------------  -----  \n 0   age       31 non-null     int64  \n 1   weight    31 non-null     float64\n 2   oxygen    31 non-null     float64\n 3   runtime   31 non-null     float64\n 4   runpulse  31 non-null     int64  \n 5   rstpulse  31 non-null     int64  \n 6   maxpulse  31 non-null     int64  \n 7   ACODE     31 non-null     int32  \ndtypes: float64(3), int32(1), int64(4)\nmemory usage: 1.9 KB<\/pre>\n\n\n\n<p>sklearn \ud328\ud0a4\uc9c0\uc5d0\uc11c KMeans \ud328\ud0a4\uc9c0\ub97c \uc784\ud3ec\ud2b8\ud569\ub2c8\ub2e4. \uc0ac\uc6a9\ud560 \ub54c\uc5d0 culster\uac00 \uc6b0\ub9ac\ub294 3\uac1c\ub85c \uc54c\uace0 \uc788\uae30 \ub54c\ubb38\uc5d0 n_cluster=3\uc73c\ub85c \uc124\uc815\ud574\uc90d\ub2c8\ub2e4. \ud574\ub2f9 \uc54c\uace0\ub9ac\uc998\uc5d0\uc11c \uac00\uc7a5 \uc911\uc694\ud55c \ubd80\ubd84\uc774 centroid\ub97c \ucd08\uae30\uc5d0 \uc5b4\ub5bb\uac8c \uc124\uc815\ud560\uac83\uc778\uac00\uc5d0 \uc758\ud55c \uac83\uc778\ub370 init\uc744 \uc815\ud574\uc8fc\uc9c0 \uc54a\ub294\ub2e4\uba74 \uae30\ubcf8\uc801\uc73c\ub85c k-means++ \ubc29\ubc95\uc744 \uc0ac\uc6a9\ud569\ub2c8\ub2e4. n_init\ub294 30\ud68c \uc911\uc559\uac12\uc744 \uac01\uae30 \ub2e4\ub978 \ud3ec\uc778\ud2b8\ub85c \uc124\uc815\ud574\uc8fc\uace0 \uadf8\uc911\uc5d0 \uac00\uc7a5 best \uac12\uc744 \ud65c\uc6a9\ud569\ub2c8\ub2e4. \uc774 \uc678\uc5d0\ub3c4 \ub2e4\uc591\ud55c \ud30c\ub77c\uba54\ud130\uac00 \uc874\uc7ac\ud558\ub2c8 \uacf5\uc2dd \ud648\ud398\uc774\uc9c0\ub97c \uc0b4\ud3b4\ubcf4\uc2dc\ub294 \uac83\uc744 \ucd94\ucc9c\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n<p><a href=\"https:\/\/scikit-learn.org\/stable\/modules\/generated\/sklearn.cluster.KMeans.html\">https:\/\/scikit-learn.org\/stable\/modules\/generated\/sklearn.cluster.KMeans.html<\/a><\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from sklearn.cluster import KMeans \nkmeans = KMeans(n_clusters=3, n_init=30,)\nkmeans.fit(dataset[dataset.columns[:-1]]) # ACODE \uc81c\uc678\ndataset['ACODE2'] = kmeans.labels_<\/pre>\n\n\n\n<p>\uc608\uce21\uacb0\uacfc\ub97c ACODE2\ub77c\ub294 \uceec\ub7fc\uc5d0 \ub370\uc774\ud130\ub97c \ucd94\uac00\ud574\ubd05\ub2c8\ub2e4.<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"559\" height=\"170\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/\u1109\u1173\u110f\u1173\u1105\u1175\u11ab\u1109\u1163\u11ba-2020-12-22-\u110b\u1169\u1112\u116e-7.01.38.png\" alt=\"\" class=\"wp-image-726\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/\u1109\u1173\u110f\u1173\u1105\u1175\u11ab\u1109\u1163\u11ba-2020-12-22-\u110b\u1169\u1112\u116e-7.01.38.png 559w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/\u1109\u1173\u110f\u1173\u1105\u1175\u11ab\u1109\u1163\u11ba-2020-12-22-\u110b\u1169\u1112\u116e-7.01.38-300x91.png 300w\" sizes=\"(max-width: 559px) 100vw, 559px\" \/><\/figure><\/div>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">(dataset['ACODE'].values == dataset['ACODE2'].values).sum()\/len(dataset)\n# 0.3225806451612903<\/pre>\n\n\n\n<p>\uc608\uc81c\ub85c \uc0ac\uc6a9\ud588\ub358 \ub370\uc774\ud130\uc758 \uacbd\uc6b0\ub294 \uc0ac\uc804\uc5d0 3\uac1c\uc758 \ud074\ub798\uc2a4\ub97c \uc54c\uace0 \uc788\uc5c8\uc9c0\ub9cc \ube44\uc9c0\ub3c4 \ud559\uc2b5\uc740 \uc774\uc5d0 \ub300\ud55c \uc815\ubcf4\uac00 \uc8fc\uc5b4\uc9c0\uc9c0 \uc54a\uae30 \ub54c\ubb38\uc5d0 \uc5b4\ub5bb\uac8c \uad70\uc9d1\uc744 \ub9cc\ub4dc\ub294 \uac83\uc774 \uac00\uc7a5 \uc88b\uc740 \ucf00\uc774\uc2a4\uc778\uc9c0 \uc54c \uc218 \uc5c6\ub294 \uacbd\uc6b0\uac00 \ub300\ubd80\ubd84\uc785\ub2c8\ub2e4. \uadf8\ub7f4 \uacbd\uc6b0 \uc544\ub798\uc640 \uac19\uc740 \ubc29\uc2dd\uc73c\ub85c num_cluster\ub97c \uccb4\ud06c\ud574\ubcfc \ud544\uc694\uac00 \uc788\uc2b5\ub2c8\ub2e4. \uadf8\ub9bc5\uc758 \uacbd\uc6b0\ub294 num_cluster\ub97c \uc815\ud558\uae30\uac00 \uc5b4\ub835\uc2b5\ub2c8\ub2e4. \uadf8 \uc774\uc720\ub294 \ub370\uc774\ud130\uac00 \uad70\uc9d1\ud558\uae30 \uc5b4\ub824\uc6b4 \ud615\ud0dc\ub85c \ubd84\uc0b0\ub418\uc5b4 \uc788\uae30 \ub54c\ubb38\uc785\ub2c8\ub2e4. \uad73\uc774 \ud55c\ub2e4\uba74 4,5\uc815\ub3c4\uac00 \uc88b\uc744 \ub4ef\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n<p>\ubc18\uba74 \uadf8\ub9bc6\uc740 \ud074\ub7ec\uc2a4\ud130\uc758 \uac2f\uc218\uac00 \uba85\ud655\ud569\ub2c8\ub2e4. 3\uac1c \uc815\ub3c4\uac00 \uac00\uc7a5 \uc88b\uc740 \ucf00\uc774\uc2a4\ub77c\uace0 \uc5ec\uc9d1\ub2c8\ub2e4. \uc774\uac83 \uc5ed\uc2dc \ub370\uc774\ud130\uc5d0 \ub530\ub77c\uc11c \ucc28\uc774\uac00 \uc788\uae30 \ub54c\ubb38\uc5d0 \uc0ac\uc804\uc5d0 \ud655\uc778\uc744 \ud574\ubcf4\ub294 \uac83\ub3c4 \uc88b\uc740 \ubc29\ubc95\uc785\ub2c8\ub2e4.<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">num_cluster = range(1,10)\ninertia_ = []\n\nfor c in num_cluster:\n    model = KMeans(n_clusters=c)\n    model.fit(dataset[dataset.columns[:-2]])\n    inertia_.append(model.inertia_)\n\n# Plot ks vs inertias\nplt.plot(num_cluster, inertia_, '-o')\nplt.xlabel('number of clusters, k')\nplt.ylabel('inertia_')\nplt.xticks(num_cluster)\nplt.show()<\/pre>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"402\" height=\"262\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-3-1.png\" alt=\"\" class=\"wp-image-727\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-3-1.png 402w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-3-1-300x196.png 300w\" sizes=\"(max-width: 402px) 100vw, 402px\" \/><figcaption>\uadf8\ub9bc 5 num_cluster<\/figcaption><\/figure><\/div>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"402\" height=\"262\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-9.png\" alt=\"\" class=\"wp-image-735\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-9.png 402w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-9-300x196.png 300w\" sizes=\"(max-width: 402px) 100vw, 402px\" \/><figcaption>\uadf8\ub9bc 6. num_cluster<\/figcaption><\/figure><\/div>\n\n\n\n<p>\uadf8\ub9bc 6\uc740 health.csv \ub370\uc774\ud130\uc14b\uc758 \uc8fc\uc131\ubd84\ubd84\uc11d\uc73c\ub85c \ub098\ud0c0\ub0b8 \uadf8\ub9bc\uc785\ub2c8\ub2e4. \ud655\uc778 \uacb0\uacfc \ud074\ub7ec\uc2a4\ud130\ub9c1\uc5d0\ub294 \uc801\uc808\uce58 \uc54a\uc740 \ub370\uc774\ud130\ub85c \ud655\uc778\ub418\ub124\uc694. \uc544\ub9c8\ub3c4 \ub370\uc774\ud130\uc758 \uc218\uac00 \ub9ce\uc9c0 \uc54a\uae30 \ub54c\ubb38\uc774\ub77c\uace0 \uc0dd\uac01\ub429\ub2c8\ub2e4. <\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">from sklearn.decomposition import PCA\npca = PCA(n_components=2)\ndim2 = pca.fit_transform(dataset[dataset.columns[:-2]])\nplt.scatter(dim2[:,0], dim2[:,1], c=dataset['ACODE'].values )<\/pre>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"377\" height=\"248\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-4-2.png\" alt=\"\" class=\"wp-image-728\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-4-2.png 377w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-4-2-300x197.png 300w\" sizes=\"(max-width: 377px) 100vw, 377px\" \/><figcaption>\uadf8\ub9bc 6. health cluster<\/figcaption><\/figure><\/div>\n\n\n\n<p>\uc704\uc640 \uac19\uc774 k-Means\uac00 \ud56d\uc0c1 \uc88b\uc740 \uacb0\uacfc\ub97c \ub0bc \uc218 \uc788\ub294 \uac83\uc740 \uc544\ub2d9\ub2c8\ub2e4. <br>\uac01\uac01\uc758 \ub370\uc774\ud130\uac00 \uc798 \ubaa8\uc5ec\uc838\uc788\ub2e4\uba74 \uc88b\uc740 \uacb0\uacfc\ub97c \ub0b4\uc9c0\ub9cc \uadf8\ub807\uc9c0 \uc54a\uace0 \uadf8\ub9bc6\uacfc \uac19\uc774 \uad70\uc9d1\uc774 \uc57d\ud560 \uacbd\uc6b0\ub294 \uc88b\uc740 \uacb0\uacfc\ub97c \uc5bb\uc744 \uc218 \uc5c6\uc2b5\ub2c8\ub2e4. <\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"380\" height=\"248\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-10.png\" alt=\"\" class=\"wp-image-748\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-10.png 380w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-10-300x196.png 300w\" sizes=\"(max-width: 380px) 100vw, 380px\" \/><figcaption>\uadf8\ub9bc 7<\/figcaption><\/figure><\/div>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-large\"><img loading=\"lazy\" width=\"380\" height=\"248\" src=\"http:\/\/cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-1-4.png\" alt=\"\" class=\"wp-image-749\" srcset=\"http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-1-4.png 380w, http:\/\/blog.cedartrees.co.kr\/wp-content\/uploads\/2020\/12\/download-1-4-300x196.png 300w\" sizes=\"(max-width: 380px) 100vw, 380px\" \/><figcaption>\uadf8\ub9bc 8<\/figcaption><\/figure><\/div>\n\n\n\n<p>K-Means\uac00 \uc88b\uc740 \uacb0\uacfc\ub97c \uc5bb\uc9c0 \ubabb\ud558\ub294 \uacbd\uc6b0\ub294 \uadf8\ub9bc7\uacfc \uac19\uc740 \ud615\ud0dc\uc758 \ub370\uc774\ud130\uc77c \uacbd\uc6b0\ub3c4 \uc88b\uc740 \uc131\ub2a5\uc744 \ubc1c\ud718\ud560 \uc218 \uc5c6\uc2b5\ub2c8\ub2e4. K-Means\ub294 \ud074\ub7ec\uc2a4\ud130\uc758 \ubc29\ud5a5\uc131\uc744 \uace0\ub824\ud558\uc9c0 \uc54a\uace0 \ubb34\uc870\uac74 \uac70\ub9ac\uac00 \uac00\uae4c\uc6b4 \ub370\uc774\ud130\ub97c \ud074\ub7ec\uc2a4\ud130\ub85c \ubb36\uc5b4\uc8fc\uae30 \ub54c\ubb38\uc5d0 \uc704\uc640 \uac19\uc740 \ud615\ud0dc\uc758 \ub370\uc774\ud130\ub294 \uc798 \ubc18\uc601\ud558\uc9c0 \ubabb\ud569\ub2c8\ub2e4. \uadf8\ub9bc\uc5d0\uc11c\uc640 \uac19\uc774 \ud074\ub7ec\uc2a4\ud130\ub97c 3\uac1c\ub85c \ubd84\ub958\ud588\uc9c0\ub9cc \ud55c\ub208\uc5d0 \ubcf4\uae30\uc5d0\ub3c4 \uc88b\uc740 \ubd84\ub958\uac00 \uc544\ub2d8\uc744 \ud655\uc778\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n<p>\uc774\uc640 \ub9c8\ucc2c\uac00\uc9c0\ub85c \uadf8\ub9bc8\ub3c4 \uc5ed\uc2dc \uc88b\uc740 \uacb0\uacfc\ub97c \uc5bb\uc744 \uc218 \uc5c6\uc2b5\ub2c8\ub2e4.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\ud559\uc2b5\uc758 \uc885\ub958\uc5d0\ub294 \ub808\uc774\ube14 \ub370\uc774\ud130\uc758 \ud3ec\ud568 \uc5ec\ubd80\uc5d0 \ub530\ub77c\uc11c \uc9c0\ub3c4\ud559\uc2b5(Supervised Learning)\uacfc \ube44\uc9c0\ub3c4\ud559\uc2b5(Unsupervised Learning)\uc73c\ub85c \ub098\ub20c \uc218 \uc788\uc2b5\ub2c8\ub2e4. \uc9c0\ub3c4\ud559\uc2b5 \uc54c\uace0\ub9ac\uc998\uc740 \uc885\uc18d\ubcc0\uc218\uc758 \ud2b9\uc9d5\uc5d0 \ub530\ub77c\uc11c \ubd84\ub958(Classification)\uc640 \ud68c\uadc0(Regression)\ub85c \ub098\ub20c \uc218 \uc788\uc2b5\ub2c8\ub2e4.\ubc18\uba74 \ube44\uc9c0\ub3c4 \ud559\uc2b5\uc740 \ub808\uc774\ube14 \ub370\uc774\ud130\uac00 \uc5c6\ub294 \ud615\ud0dc\uc758 \ub370\uc774\ud130\ub85c \ud074\ub7ec\uc2a4\ud130\ub9c1(Clustering)\uc774 \uac00\uc7a5 \ub300\ud45c\uc801\uc778 \uae30\ubc95\uc785\ub2c8\ub2e4. \ud559\uc2b5 \ub370\uc774\ud130\uc758 Feature\ub97c \ud30c\uc545\ud574\uc11c \ud2b9\uc9d5\uc744 \ucd94\ucd9c\ud558\uace0 \uc774\ub97c \ud1b5\ud574\uc11c \uacf5\ud1b5\uc810\uc774 \uc788\ub294 \ub370\uc774\ud130\ub97c \ubb36\uc5b4\uc8fc\ub294 \ud615\ud0dc\uc758 \ubd84\uc11d\uae30\ubc95\uc785\ub2c8\ub2e4. \uc608\ub97c \ub4e4\uc5b4\uc11c \ud074\ub7ec\uc2a4\ud130\ub9c1 \uae30\ubc95\uc744 \ud65c\uc6a9\ud574\uc11c \uace0\uac1d\uc758 &hellip; <\/p>\n<p class=\"link-more\"><a href=\"http:\/\/blog.cedartrees.co.kr\/index.php\/2020\/12\/22\/k-means-cluster\/\" class=\"more-link\">\ub354 \ubcf4\uae30<span class=\"screen-reader-text\"> &#8220;K-Means \ud074\ub7ec\uc2a4\ud130&#8221;<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[17,70],"tags":[69,54,48,45,55],"_links":{"self":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts\/720"}],"collection":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/comments?post=720"}],"version-history":[{"count":6,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts\/720\/revisions"}],"predecessor-version":[{"id":750,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/posts\/720\/revisions\/750"}],"wp:attachment":[{"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/media?parent=720"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/categories?post=720"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/blog.cedartrees.co.kr\/index.php\/wp-json\/wp\/v2\/tags?post=720"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}