Basic usage of the package.
First, let’s create 5 clusters normally distributed around 1 to 5, with sd of 0.3:
## id V1 V2 true_clust
## 1 1 0.3767173 0.9473130 1
## 2 2 0.7771952 0.4365945 1
## 3 3 1.2677965 1.3221844 1
## 4 4 0.8669975 1.3151264 1
## 5 5 1.2724007 0.8288909 1
## 6 6 1.1475845 0.7452437 1
## 7 7 0.9039467 0.5520469 1
## 8 8 0.7665078 0.9444070 1
## 9 9 1.0253056 0.7001926 1
## 10 10 0.8420768 0.7252489 1
## 11 11 1.0759952 1.0163106 1
## 12 12 0.7525193 0.7865068 1
## 13 13 0.7875251 1.3497340 1
## 14 14 0.6228121 0.9805805 1
## 15 15 0.6944935 1.2208939 1
## 16 16 1.2291597 1.9791167 1
## 17 17 1.0850682 1.4320827 1
## 18 18 0.9153876 0.9637182 1
## 19 19 1.7361625 1.0328868 1
## 20 20 1.0315178 1.1156138 1
## 21 21 0.8028178 1.1313384 1
## 22 22 1.3445547 1.2253213 1
## 23 23 1.1422975 1.0645733 1
## 24 24 1.2113962 0.8812399 1
## 25 25 1.1844728 1.0250569 1
## 26 26 0.2466337 1.4776891 1
## 27 27 1.6871284 1.2548285 1
## 28 28 0.8059647 1.0578151 1
## 29 29 0.7271369 0.5993674 1
## 30 30 1.0879096 0.7155586 1
## 31 31 1.1210407 1.8089211 1
## 32 32 1.2089941 1.1504575 1
## 33 33 0.8216845 1.2357720 1
## 34 34 1.4522292 0.7543033 1
## 35 35 1.5830919 0.4371217 1
## 36 36 1.0334194 0.5317554 1
## 37 37 0.9241629 1.2746520 1
## 38 38 0.8943483 0.8673550 1
## 39 39 1.0867575 1.3316706 1
## 40 40 0.5684744 1.0766575 1
## 41 41 0.9817616 1.2313768 1
## 42 42 0.6929054 1.2364633 1
## 43 43 0.5943770 1.0941386 1
## 44 44 1.6692845 1.1624366 1
## 45 45 0.9772522 1.1011560 1
## 46 46 0.5720378 1.2177179 1
## 47 47 0.9264959 1.1485001 1
## 48 48 0.4087713 1.0886696 1
## 49 49 0.7916948 1.3920948 1
## 50 50 0.4612196 1.1451793 1
## 51 51 1.0086479 1.0011060 1
## 52 52 1.0070108 0.9573810 1
## 53 53 1.2425391 1.0304969 1
## 54 54 1.3535816 0.7065712 1
## 55 55 1.4744849 0.4952795 1
## 56 56 1.2229512 0.9660392 1
## 57 57 1.3371796 0.8339075 1
## 58 58 1.0247336 0.4536824 1
## 59 59 0.7775405 1.1490041 1
## 60 60 1.1150210 1.0720482 1
## 61 61 0.7466844 0.8542125 1
## 62 62 1.1955397 1.5219069 1
## 63 63 0.8979006 0.6467794 1
## 64 64 1.2014007 1.0265304 1
## 65 65 0.1574428 1.2432242 1
## 66 66 0.7744559 1.0709800 1
## 67 67 0.8238785 0.8478476 1
## 68 68 0.9416013 0.7349223 1
## 69 69 0.9033257 0.8141168 1
## 70 70 0.4424237 0.5838100 1
## 71 71 0.8096776 1.2962844 1
## 72 72 1.3233941 1.0144875 1
## 73 73 0.8031583 1.6319976 1
## 74 74 1.3320322 1.1116487 1
## 75 75 1.1837934 0.8682858 1
## 76 76 1.6209222 0.3178315 1
## 77 77 0.9031032 0.9164385 1
## 78 78 0.8954044 1.4752830 1
## 79 79 0.6614979 0.6881379 1
## 80 80 0.9894612 1.0304082 1
## 81 81 0.6459521 0.8205572 1
## 82 82 1.2505716 1.0873547 1
## 83 83 1.1761018 1.0517744 1
## 84 84 1.5851996 0.9387091 1
## 85 85 0.9956294 0.8673277 1
## 86 86 1.2387546 1.2683582 1
## 87 87 0.6537069 0.7549864 1
## 88 88 1.3048125 0.6787272 1
## 89 89 0.9281717 0.9858927 1
## 90 90 1.3200650 0.9758446 1
## 91 91 1.4952188 1.0709705 1
## 92 92 1.1864362 0.9516226 1
## 93 93 0.9691448 1.5406666 1
## 94 94 0.9871847 1.0048342 1
## 95 95 0.8256279 1.0609859 1
## 96 96 1.3328611 0.9271259 1
## 97 97 1.3349069 1.0202728 1
## 98 98 0.7626447 1.1762738 1
## 99 99 0.7441846 0.8901470 1
## 100 100 1.0280014 1.4761237 1
## 101 101 1.7240498 2.1917667 2
## 102 102 2.1477589 2.2146340 2
## 103 103 1.7027469 2.0213280 2
## 104 104 2.1463023 2.0638567 2
## 105 105 2.4073865 2.2785103 2
## 106 106 2.1025445 1.3996684 2
## 107 107 2.3094964 1.8137042 2
## 108 108 1.5054438 1.8284215 2
## 109 109 1.2628418 1.7275650 2
## 110 110 1.8729456 1.7514444 2
## 111 111 2.2711728 2.0103012 2
## 112 112 1.8110873 2.0544719 2
## 113 113 1.4373040 2.5330542 2
## 114 114 2.3472602 2.5732635 2
## 115 115 1.8571581 1.3811372 2
## 116 116 1.7594145 1.9725059 2
## 117 117 2.1250675 2.3913515 2
## 118 118 2.2114631 1.8171498 2
## 119 119 1.9167138 2.0340057 2
## 120 120 1.7994075 1.8193712 2
## 121 121 1.8916990 2.0483636 2
## 122 122 1.7157171 2.3094967 2
## 123 123 1.7790713 2.4263179 2
## 124 124 2.2918029 1.0859611 2
## 125 125 1.9785292 2.3038684 2
## 126 126 2.0330563 1.6736483 2
## 127 127 2.8201623 2.2324056 2
## 128 128 2.0486865 1.7985402 2
## 129 129 1.5673919 1.5444837 2
## 130 130 2.2475204 1.3704727 2
## 131 131 2.1664586 2.1267505 2
## 132 132 2.1036168 1.8198495 2
## 133 133 2.0654851 1.6671382 2
## 134 134 1.9945100 1.9456569 2
## 135 135 1.8337013 2.0862003 2
## 136 136 2.3137056 1.6447256 2
## 137 137 1.5789334 1.4890468 2
## 138 138 2.2722327 2.0891428 2
## 139 139 2.3241516 2.0692744 2
## 140 140 2.1932210 2.1691581 2
## 141 141 2.0636184 1.7113753 2
## 142 142 2.6543953 2.2280597 2
## 143 143 1.9986698 1.8805143 2
## 144 144 1.9022112 1.8386501 2
## 145 145 2.0550513 2.2096079 2
## 146 146 1.8822028 2.2294607 2
## 147 147 2.0534039 2.6261139 2
## 148 148 2.1858917 2.5393751 2
## 149 149 2.2581082 1.6637601 2
## 150 150 1.6671152 2.0051653 2
## 151 151 2.0608000 1.7146325 2
## 152 152 2.1933840 2.0572412 2
## 153 153 1.7489826 1.7053830 2
## 154 154 1.9614406 2.0093789 2
## 155 155 2.4251870 2.1460354 2
## 156 156 2.1578004 1.3326680 2
## 157 157 1.8280531 1.9106273 2
## 158 158 2.1618254 2.2173712 2
## 159 159 2.4764937 2.3472773 2
## 160 160 1.5468318 2.3676396 2
## 161 161 1.8788354 2.4492316 2
## 162 162 1.6990724 2.0442601 2
## 163 163 2.2534504 1.9472414 2
## 164 164 2.1236685 2.1272375 2
## 165 165 2.2178681 2.6039467 2
## 166 166 1.9941017 1.8509873 2
## 167 167 1.8064223 1.8947630 2
## 168 168 2.1018646 1.5397721 2
## 169 169 2.1232171 2.1360815 2
## 170 170 2.0092199 2.1201713 2
## 171 171 1.6443690 1.9995858 2
## 172 172 1.6683686 2.5353654 2
## 173 173 1.4272309 1.9921655 2
## 174 174 1.3661323 2.3628861 2
## 175 175 2.0740349 2.5421528 2
## 176 176 2.0555251 1.4521124 2
## 177 177 1.6892600 1.4476947 2
## 178 178 1.6147121 2.0013555 2
## 179 179 1.3534581 1.8987578 2
## 180 180 2.3528521 1.9627606 2
## 181 181 1.6662967 2.0770224 2
## 182 182 2.3474273 1.4765016 2
## 183 183 1.5646400 1.5292227 2
## 184 184 1.9124392 1.8891854 2
## 185 185 1.9598714 2.0247285 2
## 186 186 2.0877701 1.9065968 2
## 187 187 2.2941633 1.8008229 2
## 188 188 2.3978545 2.1953938 2
## 189 189 2.0319868 2.5153169 2
## 190 190 1.7046119 1.7643745 2
## 191 191 1.6138429 1.6632780 2
## 192 192 2.0982337 2.1636975 2
## 193 193 2.5543183 2.2031899 2
## 194 194 1.8657742 2.7916297 2
## 195 195 2.0088602 1.7120612 2
## 196 196 1.8446500 2.1530686 2
## 197 197 1.7581656 2.3675160 2
## 198 198 1.7663722 2.1299673 2
## 199 199 2.1242548 1.8132569 2
## 200 200 2.0498030 2.2659930 2
## 201 201 3.4014600 3.5878311 3
## 202 202 3.6374637 2.8851439 3
## 203 203 3.3119046 3.4016763 3
## 204 204 3.2398718 3.1279438 3
## 205 205 2.9069991 3.1139049 3
## 206 206 3.6286900 3.1263548 3
## 207 207 3.0073556 2.9962502 3
## 208 208 3.1202868 3.1757186 3
## 209 209 3.1378234 2.6987379 3
## 210 210 2.9299413 2.9252239 3
## 211 211 2.5500608 2.8270395 3
## 212 212 3.1046580 2.7386459 3
## 213 213 3.4140302 2.9988731 3
## 214 214 2.8560988 3.3510093 3
## 215 215 3.3315660 2.5843607 3
## 216 216 3.0761561 2.6183031 3
## 217 217 3.4324299 2.6947791 3
## 218 218 2.5208764 2.9306676 3
## 219 219 3.0635919 2.6676517 3
## 220 220 2.8867122 2.7539662 3
## 221 221 3.4006310 2.8754211 3
## 222 222 2.6780094 3.2825047 3
## 223 223 2.6108648 2.6799331 3
## 224 224 2.8211712 3.2369790 3
## 225 225 2.7514521 2.6151687 3
## 226 226 3.0444776 2.6842558 3
## 227 227 3.1433062 3.1563466 3
## 228 228 2.7150084 2.9706850 3
## 229 229 3.0973033 2.7993101 3
## 230 230 2.8818278 2.8024633 3
## 231 231 3.0127776 2.9495391 3
## 232 232 2.9490148 3.0095152 3
## 233 233 3.1147094 3.2221070 3
## 234 234 3.0645069 3.1312490 3
## 235 235 2.9944222 3.0712954 3
## 236 236 3.3457507 3.3244889 3
## 237 237 2.8515466 2.4184105 3
## 238 238 3.0735926 3.1414431 3
## 239 239 3.2692307 3.0904445 3
## 240 240 2.8705738 3.2510183 3
## 241 241 3.2456386 2.6809704 3
## 242 242 3.4209469 3.1657808 3
## 243 243 2.4598513 2.8037340 3
## 244 244 3.0575639 2.7395245 3
## 245 245 3.0764313 3.4775483 3
## 246 246 3.1290985 3.0854845 3
## 247 247 3.2054891 3.3187393 3
## 248 248 3.0558586 2.7255177 3
## 249 249 3.1223712 2.5304063 3
## 250 250 3.0308103 3.2224918 3
## 251 251 3.2480612 3.3634026 3
## 252 252 3.1948875 3.3226132 3
## 253 253 3.1150878 2.6371873 3
## 254 254 2.9311233 2.6060601 3
## 255 255 2.8559950 3.2117923 3
## 256 256 3.1819779 3.1301060 3
## 257 257 3.1573751 2.7412442 3
## 258 258 3.1382019 2.7441654 3
## 259 259 2.9298448 2.9606736 3
## 260 260 2.9816983 2.7416725 3
## 261 261 3.2356440 2.2047582 3
## 262 262 2.8964914 2.9847514 3
## 263 263 2.9328276 3.1378569 3
## 264 264 2.8377260 2.9081859 3
## 265 265 2.6228130 2.6790083 3
## 266 266 3.0542829 3.2467586 3
## 267 267 2.2097800 2.9995353 3
## 268 268 3.1322535 2.4982152 3
## 269 269 2.4058656 2.3843438 3
## 270 270 2.7437235 3.2251702 3
## 271 271 3.1755176 2.6126634 3
## 272 272 3.4758594 3.4672082 3
## 273 273 3.2354723 2.7545875 3
## 274 274 3.1234780 3.0544978 3
## 275 275 3.3764887 2.5237407 3
## 276 276 2.7216100 2.5560206 3
## 277 277 2.8507435 3.0570732 3
## 278 278 2.7876514 2.8955042 3
## 279 279 2.9982702 3.7034991 3
## 280 280 2.7697049 3.1549488 3
## 281 281 3.3192365 2.9823753 3
## 282 282 2.9201597 3.0979474 3
## 283 283 3.5827237 3.1819142 3
## 284 284 3.0160687 2.5572207 3
## 285 285 2.2966058 2.8007284 3
## 286 286 2.6834115 3.2508303 3
## 287 287 2.9341241 2.8462358 3
## 288 288 3.3275874 2.9343699 3
## 289 289 2.9847950 3.6038591 3
## 290 290 2.6611595 2.8693601 3
## 291 291 2.9556805 2.9651621 3
## 292 292 3.0763826 3.0402241 3
## 293 293 2.8864817 3.0387177 3
## 294 294 3.0610897 3.4376058 3
## 295 295 3.3142024 2.9601696 3
## 296 296 2.3653202 3.7197664 3
## 297 297 2.9514580 2.9151938 3
## 298 298 3.2201968 2.6798721 3
## 299 299 3.0463166 2.6714989 3
## 300 300 3.1729396 3.0031066 3
## 301 301 5.0658976 4.1118427 4
## 302 302 4.3960411 4.4339442 4
## 303 303 4.1868534 4.1215591 4
## 304 304 4.1132864 3.8713248 4
## 305 305 3.8033336 4.0169877 4
## 306 306 4.1373131 3.7863779 4
## 307 307 3.4601896 4.3591492 4
## 308 308 4.4199619 3.8747844 4
## 309 309 3.7011369 4.0371309 4
## 310 310 3.8334790 3.6505311 4
## 311 311 3.8187098 3.8243625 4
## 312 312 3.5109187 3.4985160 4
## 313 313 3.6193299 3.8202263 4
## 314 314 4.2024148 4.0312200 4
## 315 315 3.7779494 3.7427711 4
## 316 316 3.7654909 4.2355863 4
## 317 317 4.1164779 3.7456796 4
## 318 318 3.4877721 4.0340104 4
## 319 319 3.5040767 3.9333184 4
## 320 320 4.0742307 4.3381556 4
## 321 321 3.6800416 3.9583730 4
## 322 322 4.2599479 3.6184553 4
## 323 323 3.6916683 4.2870491 4
## 324 324 4.2988735 3.9259953 4
## 325 325 3.6510664 4.2109721 4
## 326 326 3.7882583 3.9926424 4
## 327 327 4.1631527 4.3584797 4
## 328 328 4.2291454 4.1269821 4
## 329 329 4.1560941 3.9657191 4
## 330 330 3.9100256 4.2951495 4
## 331 331 4.1236208 3.8895305 4
## 332 332 3.8239899 4.1661117 4
## 333 333 3.3083151 3.4777662 4
## 334 334 4.3415230 4.1187053 4
## 335 335 3.9312143 4.0712408 4
## 336 336 3.8007796 3.8484348 4
## 337 337 3.9569270 3.4701330 4
## 338 338 3.2766816 3.9932455 4
## 339 339 3.7546550 4.6039884 4
## 340 340 3.6065517 4.3826314 4
## 341 341 4.4218779 3.9231241 4
## 342 342 4.6569220 4.1839227 4
## 343 343 4.0421375 4.1250092 4
## 344 344 4.0905590 3.6298293 4
## 345 345 3.7393145 4.0518412 4
## 346 346 3.6922708 4.0118505 4
## 347 347 3.2629506 3.6010818 4
## 348 348 3.9450430 3.6383833 4
## 349 349 3.6144095 3.9902396 4
## 350 350 3.8335546 4.0751249 4
## 351 351 3.0365398 3.9707823 4
## 352 352 3.5990194 4.5702123 4
## 353 353 4.0892900 3.6139801 4
## 354 354 3.9864916 3.8627303 4
## 355 355 4.0131637 3.8038534 4
## 356 356 4.0252756 4.4329086 4
## 357 357 4.4190276 3.7876161 4
## 358 358 4.1748940 4.2161852 4
## 359 359 3.6860696 3.7491322 4
## 360 360 4.1050336 3.8821676 4
## 361 361 3.6214095 3.9572185 4
## 362 362 3.7729619 4.2351023 4
## 363 363 4.0053699 3.9977893 4
## 364 364 4.2968411 4.1106944 4
## 365 365 4.3472787 3.9834814 4
## 366 366 4.2006914 3.7829213 4
## 367 367 3.9609321 4.5616079 4
## 368 368 4.0589061 4.1490892 4
## 369 369 4.3897579 4.0635033 4
## 370 370 3.8204380 3.5928239 4
## 371 371 4.3655069 4.4265610 4
## 372 372 4.0523961 4.0961256 4
## 373 373 3.6651531 4.1207548 4
## 374 374 3.8623737 4.0098867 4
## 375 375 3.7550230 3.4420118 4
## 376 376 3.7677814 4.5335275 4
## 377 377 4.2884102 3.9644200 4
## 378 378 3.9414273 3.9272499 4
## 379 379 3.5483790 4.6100401 4
## 380 380 4.3476525 3.9796773 4
## 381 381 4.1418682 4.3316494 4
## 382 382 4.1430704 3.8302312 4
## 383 383 4.4167253 4.2131191 4
## 384 384 4.2517418 3.8564708 4
## 385 385 4.2964473 4.3320663 4
## 386 386 3.9881072 4.0797791 4
## 387 387 4.0790616 3.9764928 4
## 388 388 4.2799163 4.5571738 4
## 389 389 3.8732943 4.6022994 4
## 390 390 3.8629210 4.2231740 4
## 391 391 3.3642462 4.5828260 4
## 392 392 3.2886044 3.0858300 4
## 393 393 3.8766071 4.4275967 4
## 394 394 4.1774254 3.9102585 4
## 395 395 3.7310281 4.2649693 4
## 396 396 3.8667199 3.8195696 4
## 397 397 4.4789976 4.0482910 4
## 398 398 3.9797508 3.6611005 4
## 399 399 4.6344643 3.9675443 4
## 400 400 4.1275862 3.4571791 4
## 401 401 4.9528079 5.2055450 5
## 402 402 5.0878482 5.0924140 5
## 403 403 4.6741911 5.1163494 5
## 404 404 5.0023908 4.7641224 5
## 405 405 4.5618298 5.1184738 5
## 406 406 4.7550394 4.9441917 5
## 407 407 4.7763145 4.6512989 5
## 408 408 5.0783865 5.4003769 5
## 409 409 5.0655995 4.8273421 5
## 410 410 5.2608621 5.2467903 5
## 411 411 4.8916297 4.4663501 5
## 412 412 5.3116336 5.2716688 5
## 413 413 4.8995723 5.2050814 5
## 414 414 5.2024178 4.5739802 5
## 415 415 4.8221563 5.0714366 5
## 416 416 4.8676445 5.0584146 5
## 417 417 5.0932675 5.0877407 5
## 418 418 4.7602556 5.0493434 5
## 419 419 4.7298884 4.7414779 5
## 420 420 5.2205917 5.1662224 5
## 421 421 4.8877069 5.0124886 5
## 422 422 4.7700879 4.9376019 5
## 423 423 4.9098111 4.9191843 5
## 424 424 4.8271294 4.8663331 5
## 425 425 5.0847552 5.1179263 5
## 426 426 4.3802274 5.1305315 5
## 427 427 5.4411971 5.3068429 5
## 428 428 4.8435579 5.2774962 5
## 429 429 5.0255470 5.3882214 5
## 430 430 5.2277374 4.9545828 5
## 431 431 5.1579784 5.0488488 5
## 432 432 4.7694586 5.2445188 5
## 433 433 4.8090070 5.5523365 5
## 434 434 5.0644914 5.3426375 5
## 435 435 5.3879294 5.0977684 5
## 436 436 4.6587048 5.2273494 5
## 437 437 5.0713278 4.7908216 5
## 438 438 5.4761548 4.8248370 5
## 439 439 4.7681518 5.0183175 5
## 440 440 5.0139597 5.5861693 5
## 441 441 4.6097091 4.8949456 5
## 442 442 4.6728384 4.9600640 5
## 443 443 4.6685297 5.0280022 5
## 444 444 4.9063547 5.1587622 5
## 445 445 5.5939132 4.7610045 5
## 446 446 4.7176943 4.8838678 5
## 447 447 5.2771058 5.6967682 5
## 448 448 4.8712238 4.7407695 5
## 449 449 5.6537227 5.1934508 5
## 450 450 5.2763778 5.0025880 5
## 451 451 4.7780855 5.2877895 5
## 452 452 5.1183572 5.1296841 5
## 453 453 5.8577331 5.1192475 5
## 454 454 5.2747807 5.1126527 5
## 455 455 4.9210938 5.0188880 5
## 456 456 4.7925643 5.4408702 5
## 457 457 5.0880987 4.7718892 5
## 458 458 4.9638334 4.6413824 5
## 459 459 5.2045986 5.4291573 5
## 460 460 5.1928309 5.4566901 5
## 461 461 4.5885742 4.6579124 5
## 462 462 4.6955974 4.8846240 5
## 463 463 4.9758209 5.4765303 5
## 464 464 5.3771165 5.2072112 5
## 465 465 5.1722732 4.8891495 5
## 466 466 4.8913705 5.0382076 5
## 467 467 4.8732979 4.6625030 5
## 468 468 5.5956956 4.8497412 5
## 469 469 4.9823469 4.7980411 5
## 470 470 4.7359295 5.3876489 5
## 471 471 5.1715411 5.5381872 5
## 472 472 4.7551571 5.6016612 5
## 473 473 4.8647725 4.5283884 5
## 474 474 4.6424446 4.4978754 5
## 475 475 5.3232133 5.0866273 5
## 476 476 5.4520067 4.5584938 5
## 477 477 4.6397623 5.1159071 5
## 478 478 5.3549637 4.9362248 5
## 479 479 5.4418887 4.9147234 5
## 480 480 5.1208872 5.0612488 5
## 481 481 5.2072071 4.9552603 5
## 482 482 4.6556174 4.8887860 5
## 483 483 4.8571581 4.6189618 5
## 484 484 4.6487836 5.3243266 5
## 485 485 4.8955029 5.1528013 5
## 486 486 4.8517082 4.7854893 5
## 487 487 5.3641726 5.0651947 5
## 488 488 5.2241704 4.9324635 5
## 489 489 4.7790376 4.9088442 5
## 490 490 5.0846269 4.9624876 5
## 491 491 5.2265442 5.4841561 5
## 492 492 5.1360284 4.9758763 5
## 493 493 5.3695591 4.6951398 5
## 494 494 5.4136573 4.2168497 5
## 495 495 4.8261880 4.6525210 5
## 496 496 4.9125355 5.1383930 5
## 497 497 4.9937645 5.1260706 5
## 498 498 5.1068414 5.0851730 5
## 499 499 5.1397088 5.3040260 5
## 500 500 4.8450039 4.6849199 5
This is how our data looks like:
data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "true cluster")
Now we can cluster it using kmeans++:
data_for_clust <- data %>% select(id, starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
k = 5,
metric = "euclid",
verbose = TRUE
)
## id column: id
## KMEans: will generate seeds
## KMeans into generate seeds
## at seed 0
## add new core from 23 to 0
## at seed 1
## done update min distance
## seed range 350 450
## picked up 411 dist was 3.00847
## add new core from 411 to 1
## at seed 2
## done update min distance
## seed range 300 400
## picked up 249 dist was 1.43524
## add new core from 249 to 2
## at seed 3
## done update min distance
## seed range 250 350
## picked up 323 dist was 0.742448
## add new core from 323 to 3
## at seed 4
## done update min distance
## seed range 200 300
## picked up 130 dist was 0.67049
## add new core from 130 to 4
## KMEans: reassign after init
## KMEans: iter 0
## KMEans: iter 1 changed 7
## KMEans: iter 1
## KMEans: iter 2 changed 3
## KMEans: iter 2
## KMEans: iter 3 changed 0
The returned list contains 3 fields:
## [1] "centers" "cluster" "size"
km$centers
contains a tibble with clust
column and the cluster centers:
## # A tibble: 5 × 3
## clust V1 V2
## <int> <dbl> <dbl>
## 1 1 3.04 2.98
## 2 2 1.98 2.00
## 3 3 3.98 4.05
## 4 4 1.00 1.02
## 5 5 5.01 5.02
clusters are numbered according to order_func
(see ‘Custom cluster ordering’ section).
km$cluster
contains tibble with id
column with the observation id (1:n
if no id column was supplied), and clust
column with the observation assigned cluster:
## # A tibble: 500 × 2
## id clust
## <chr> <int>
## 1 1 4
## 2 2 4
## 3 3 4
## 4 4 4
## 5 5 4
## 6 6 4
## 7 7 4
## 8 8 4
## 9 9 4
## 10 10 4
## # … with 490 more rows
km$size
contains tibble with clust
column and n
column with the number of points in each cluster:
## # A tibble: 5 × 2
## clust n
## <int> <int>
## 1 1 104
## 2 2 100
## 3 3 95
## 4 4 100
## 5 5 101
We can now check our clustering performance - fraction of observations that were classified correctly (Note that match_clusters
function is internal to the package and is used only in this vignette):
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.982
And plot the results:
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "cluster") +
scale_shape_discrete(name = "true cluster") +
geom_point(data = km$centers, size = 7, color = "black", shape = "X")
By default, the clusters where ordered using the following function: hclust(dist(cor(t(centers))))
- hclust of the euclidean distance of the correlation matrix of the centers.
We can supply our own function to order the clusters using reorder_func
argument. The function would be applied to each center and he clusters would be ordered by the result.
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 5,
metric = "euclid",
verbose = FALSE,
reorder_func = median
)
km$centers
## # A tibble: 5 × 3
## clust V1 V2
## <int> <dbl> <dbl>
## 1 1 0.999 1.01
## 2 2 1.97 2.00
## 3 3 3.04 2.98
## 4 4 3.98 4.05
## 5 5 5.01 5.02
tglkmeans can deal with missing data, as long as at least one dimension is not missing. for example:
## id V1 V2 true_clust
## 1 1 0.3767173 0.9473130 1
## 2 2 0.7771952 0.4365945 1
## 3 3 NA 1.3221844 1
## 4 4 NA 1.3151264 1
## 5 5 1.2724007 0.8288909 1
## 6 6 1.1475845 0.7452437 1
## 7 7 0.9039467 0.5520469 1
## 8 8 0.7665078 0.9444070 1
## 9 9 1.0253056 0.7001926 1
## 10 10 NA 0.7252489 1
## 11 11 1.0759952 1.0163106 1
## 12 12 NA 0.7865068 1
## 13 13 0.7875251 1.3497340 1
## 14 14 NA 0.9805805 1
## 15 15 NA 1.2208939 1
## 16 16 NA 1.9791167 1
## 17 17 1.0850682 1.4320827 1
## 18 18 0.9153876 0.9637182 1
## 19 19 1.7361625 1.0328868 1
## 20 20 1.0315178 1.1156138 1
## 21 21 NA 1.1313384 1
## 22 22 1.3445547 1.2253213 1
## 23 23 1.1422975 1.0645733 1
## 24 24 1.2113962 0.8812399 1
## 25 25 1.1844728 1.0250569 1
## 26 26 0.2466337 1.4776891 1
## 27 27 1.6871284 1.2548285 1
## 28 28 0.8059647 1.0578151 1
## 29 29 0.7271369 0.5993674 1
## 30 30 NA 0.7155586 1
## 31 31 1.1210407 1.8089211 1
## 32 32 NA 1.1504575 1
## 33 33 0.8216845 1.2357720 1
## 34 34 1.4522292 0.7543033 1
## 35 35 NA 0.4371217 1
## 36 36 1.0334194 0.5317554 1
## 37 37 0.9241629 1.2746520 1
## 38 38 0.8943483 0.8673550 1
## 39 39 1.0867575 1.3316706 1
## 40 40 0.5684744 1.0766575 1
## 41 41 0.9817616 1.2313768 1
## 42 42 0.6929054 1.2364633 1
## 43 43 NA 1.0941386 1
## 44 44 1.6692845 1.1624366 1
## 45 45 0.9772522 1.1011560 1
## 46 46 0.5720378 1.2177179 1
## 47 47 NA 1.1485001 1
## 48 48 0.4087713 1.0886696 1
## 49 49 0.7916948 1.3920948 1
## 50 50 0.4612196 1.1451793 1
## 51 51 1.0086479 1.0011060 1
## 52 52 1.0070108 0.9573810 1
## 53 53 1.2425391 1.0304969 1
## 54 54 1.3535816 0.7065712 1
## 55 55 NA 0.4952795 1
## 56 56 NA 0.9660392 1
## 57 57 1.3371796 0.8339075 1
## 58 58 NA 0.4536824 1
## 59 59 0.7775405 1.1490041 1
## 60 60 NA 1.0720482 1
## 61 61 0.7466844 0.8542125 1
## 62 62 NA 1.5219069 1
## 63 63 0.8979006 0.6467794 1
## 64 64 1.2014007 1.0265304 1
## 65 65 NA 1.2432242 1
## 66 66 0.7744559 1.0709800 1
## 67 67 0.8238785 0.8478476 1
## 68 68 0.9416013 0.7349223 1
## 69 69 0.9033257 0.8141168 1
## 70 70 0.4424237 0.5838100 1
## 71 71 0.8096776 1.2962844 1
## 72 72 1.3233941 1.0144875 1
## 73 73 0.8031583 1.6319976 1
## 74 74 NA 1.1116487 1
## 75 75 1.1837934 0.8682858 1
## 76 76 1.6209222 0.3178315 1
## 77 77 NA 0.9164385 1
## 78 78 0.8954044 1.4752830 1
## 79 79 0.6614979 0.6881379 1
## 80 80 0.9894612 1.0304082 1
## 81 81 NA 0.8205572 1
## 82 82 1.2505716 1.0873547 1
## 83 83 1.1761018 1.0517744 1
## 84 84 1.5851996 0.9387091 1
## 85 85 0.9956294 0.8673277 1
## 86 86 1.2387546 1.2683582 1
## 87 87 0.6537069 0.7549864 1
## 88 88 1.3048125 0.6787272 1
## 89 89 0.9281717 0.9858927 1
## 90 90 1.3200650 0.9758446 1
## 91 91 1.4952188 1.0709705 1
## 92 92 1.1864362 0.9516226 1
## 93 93 NA 1.5406666 1
## 94 94 0.9871847 1.0048342 1
## 95 95 0.8256279 1.0609859 1
## 96 96 1.3328611 0.9271259 1
## 97 97 1.3349069 1.0202728 1
## 98 98 0.7626447 1.1762738 1
## 99 99 0.7441846 0.8901470 1
## 100 100 NA 1.4761237 1
## 101 101 NA 2.1917667 2
## 102 102 2.1477589 2.2146340 2
## 103 103 1.7027469 2.0213280 2
## 104 104 2.1463023 2.0638567 2
## 105 105 2.4073865 2.2785103 2
## 106 106 2.1025445 1.3996684 2
## 107 107 2.3094964 1.8137042 2
## 108 108 NA 1.8284215 2
## 109 109 1.2628418 1.7275650 2
## 110 110 1.8729456 1.7514444 2
## 111 111 2.2711728 2.0103012 2
## 112 112 1.8110873 2.0544719 2
## 113 113 1.4373040 2.5330542 2
## 114 114 NA 2.5732635 2
## 115 115 NA 1.3811372 2
## 116 116 1.7594145 1.9725059 2
## 117 117 2.1250675 2.3913515 2
## 118 118 2.2114631 1.8171498 2
## 119 119 1.9167138 2.0340057 2
## 120 120 1.7994075 1.8193712 2
## 121 121 1.8916990 2.0483636 2
## 122 122 1.7157171 2.3094967 2
## 123 123 1.7790713 2.4263179 2
## 124 124 2.2918029 1.0859611 2
## 125 125 1.9785292 2.3038684 2
## 126 126 2.0330563 1.6736483 2
## 127 127 2.8201623 2.2324056 2
## 128 128 2.0486865 1.7985402 2
## 129 129 1.5673919 1.5444837 2
## 130 130 2.2475204 1.3704727 2
## 131 131 2.1664586 2.1267505 2
## 132 132 2.1036168 1.8198495 2
## 133 133 NA 1.6671382 2
## 134 134 1.9945100 1.9456569 2
## 135 135 1.8337013 2.0862003 2
## 136 136 2.3137056 1.6447256 2
## 137 137 1.5789334 1.4890468 2
## 138 138 2.2722327 2.0891428 2
## 139 139 2.3241516 2.0692744 2
## 140 140 2.1932210 2.1691581 2
## 141 141 NA 1.7113753 2
## 142 142 2.6543953 2.2280597 2
## 143 143 NA 1.8805143 2
## 144 144 1.9022112 1.8386501 2
## 145 145 2.0550513 2.2096079 2
## 146 146 1.8822028 2.2294607 2
## 147 147 2.0534039 2.6261139 2
## 148 148 2.1858917 2.5393751 2
## 149 149 2.2581082 1.6637601 2
## 150 150 NA 2.0051653 2
## 151 151 2.0608000 1.7146325 2
## 152 152 NA 2.0572412 2
## 153 153 1.7489826 1.7053830 2
## 154 154 1.9614406 2.0093789 2
## 155 155 NA 2.1460354 2
## 156 156 2.1578004 1.3326680 2
## 157 157 1.8280531 1.9106273 2
## 158 158 2.1618254 2.2173712 2
## 159 159 2.4764937 2.3472773 2
## 160 160 1.5468318 2.3676396 2
## 161 161 1.8788354 2.4492316 2
## 162 162 1.6990724 2.0442601 2
## 163 163 2.2534504 1.9472414 2
## 164 164 2.1236685 2.1272375 2
## 165 165 2.2178681 2.6039467 2
## 166 166 1.9941017 1.8509873 2
## 167 167 1.8064223 1.8947630 2
## 168 168 2.1018646 1.5397721 2
## 169 169 2.1232171 2.1360815 2
## 170 170 NA 2.1201713 2
## 171 171 1.6443690 1.9995858 2
## 172 172 1.6683686 2.5353654 2
## 173 173 1.4272309 1.9921655 2
## 174 174 NA 2.3628861 2
## 175 175 NA 2.5421528 2
## 176 176 2.0555251 1.4521124 2
## 177 177 1.6892600 1.4476947 2
## 178 178 1.6147121 2.0013555 2
## 179 179 1.3534581 1.8987578 2
## 180 180 2.3528521 1.9627606 2
## 181 181 1.6662967 2.0770224 2
## 182 182 2.3474273 1.4765016 2
## 183 183 1.5646400 1.5292227 2
## 184 184 NA 1.8891854 2
## 185 185 NA 2.0247285 2
## 186 186 2.0877701 1.9065968 2
## 187 187 2.2941633 1.8008229 2
## 188 188 2.3978545 2.1953938 2
## 189 189 2.0319868 2.5153169 2
## 190 190 1.7046119 1.7643745 2
## 191 191 NA 1.6632780 2
## 192 192 2.0982337 2.1636975 2
## 193 193 2.5543183 2.2031899 2
## 194 194 NA 2.7916297 2
## 195 195 NA 1.7120612 2
## 196 196 1.8446500 2.1530686 2
## 197 197 1.7581656 2.3675160 2
## 198 198 1.7663722 2.1299673 2
## 199 199 2.1242548 1.8132569 2
## 200 200 2.0498030 2.2659930 2
## 201 201 3.4014600 3.5878311 3
## 202 202 NA 2.8851439 3
## 203 203 3.3119046 3.4016763 3
## 204 204 3.2398718 3.1279438 3
## 205 205 2.9069991 3.1139049 3
## 206 206 3.6286900 3.1263548 3
## 207 207 3.0073556 2.9962502 3
## 208 208 3.1202868 3.1757186 3
## 209 209 NA 2.6987379 3
## 210 210 2.9299413 2.9252239 3
## 211 211 2.5500608 2.8270395 3
## 212 212 3.1046580 2.7386459 3
## 213 213 3.4140302 2.9988731 3
## 214 214 2.8560988 3.3510093 3
## 215 215 NA 2.5843607 3
## 216 216 3.0761561 2.6183031 3
## 217 217 3.4324299 2.6947791 3
## 218 218 2.5208764 2.9306676 3
## 219 219 NA 2.6676517 3
## 220 220 2.8867122 2.7539662 3
## 221 221 NA 2.8754211 3
## 222 222 2.6780094 3.2825047 3
## 223 223 NA 2.6799331 3
## 224 224 2.8211712 3.2369790 3
## 225 225 2.7514521 2.6151687 3
## 226 226 3.0444776 2.6842558 3
## 227 227 3.1433062 3.1563466 3
## 228 228 2.7150084 2.9706850 3
## 229 229 3.0973033 2.7993101 3
## 230 230 2.8818278 2.8024633 3
## 231 231 3.0127776 2.9495391 3
## 232 232 2.9490148 3.0095152 3
## 233 233 3.1147094 3.2221070 3
## 234 234 3.0645069 3.1312490 3
## 235 235 2.9944222 3.0712954 3
## 236 236 3.3457507 3.3244889 3
## 237 237 2.8515466 2.4184105 3
## 238 238 3.0735926 3.1414431 3
## 239 239 3.2692307 3.0904445 3
## 240 240 2.8705738 3.2510183 3
## 241 241 3.2456386 2.6809704 3
## 242 242 NA 3.1657808 3
## 243 243 2.4598513 2.8037340 3
## 244 244 3.0575639 2.7395245 3
## 245 245 3.0764313 3.4775483 3
## 246 246 3.1290985 3.0854845 3
## 247 247 3.2054891 3.3187393 3
## 248 248 3.0558586 2.7255177 3
## 249 249 NA 2.5304063 3
## 250 250 3.0308103 3.2224918 3
## 251 251 3.2480612 3.3634026 3
## 252 252 3.1948875 3.3226132 3
## 253 253 NA 2.6371873 3
## 254 254 2.9311233 2.6060601 3
## 255 255 2.8559950 3.2117923 3
## 256 256 3.1819779 3.1301060 3
## 257 257 3.1573751 2.7412442 3
## 258 258 3.1382019 2.7441654 3
## 259 259 2.9298448 2.9606736 3
## 260 260 2.9816983 2.7416725 3
## 261 261 3.2356440 2.2047582 3
## 262 262 NA 2.9847514 3
## 263 263 2.9328276 3.1378569 3
## 264 264 NA 2.9081859 3
## 265 265 2.6228130 2.6790083 3
## 266 266 NA 3.2467586 3
## 267 267 2.2097800 2.9995353 3
## 268 268 NA 2.4982152 3
## 269 269 2.4058656 2.3843438 3
## 270 270 2.7437235 3.2251702 3
## 271 271 3.1755176 2.6126634 3
## 272 272 3.4758594 3.4672082 3
## 273 273 3.2354723 2.7545875 3
## 274 274 3.1234780 3.0544978 3
## 275 275 NA 2.5237407 3
## 276 276 2.7216100 2.5560206 3
## 277 277 NA 3.0570732 3
## 278 278 2.7876514 2.8955042 3
## 279 279 2.9982702 3.7034991 3
## 280 280 2.7697049 3.1549488 3
## 281 281 3.3192365 2.9823753 3
## 282 282 2.9201597 3.0979474 3
## 283 283 3.5827237 3.1819142 3
## 284 284 3.0160687 2.5572207 3
## 285 285 NA 2.8007284 3
## 286 286 NA 3.2508303 3
## 287 287 NA 2.8462358 3
## 288 288 3.3275874 2.9343699 3
## 289 289 2.9847950 3.6038591 3
## 290 290 2.6611595 2.8693601 3
## 291 291 2.9556805 2.9651621 3
## 292 292 3.0763826 3.0402241 3
## 293 293 2.8864817 3.0387177 3
## 294 294 3.0610897 3.4376058 3
## 295 295 3.3142024 2.9601696 3
## 296 296 2.3653202 3.7197664 3
## 297 297 2.9514580 2.9151938 3
## 298 298 3.2201968 2.6798721 3
## 299 299 3.0463166 2.6714989 3
## 300 300 3.1729396 3.0031066 3
## 301 301 5.0658976 4.1118427 4
## 302 302 4.3960411 4.4339442 4
## 303 303 4.1868534 4.1215591 4
## 304 304 4.1132864 3.8713248 4
## 305 305 3.8033336 4.0169877 4
## 306 306 4.1373131 3.7863779 4
## 307 307 3.4601896 4.3591492 4
## 308 308 4.4199619 3.8747844 4
## 309 309 3.7011369 4.0371309 4
## 310 310 NA 3.6505311 4
## 311 311 3.8187098 3.8243625 4
## 312 312 3.5109187 3.4985160 4
## 313 313 3.6193299 3.8202263 4
## 314 314 4.2024148 4.0312200 4
## 315 315 3.7779494 3.7427711 4
## 316 316 3.7654909 4.2355863 4
## 317 317 NA 3.7456796 4
## 318 318 3.4877721 4.0340104 4
## 319 319 NA 3.9333184 4
## 320 320 NA 4.3381556 4
## 321 321 3.6800416 3.9583730 4
## 322 322 4.2599479 3.6184553 4
## 323 323 3.6916683 4.2870491 4
## 324 324 NA 3.9259953 4
## 325 325 3.6510664 4.2109721 4
## 326 326 3.7882583 3.9926424 4
## 327 327 4.1631527 4.3584797 4
## 328 328 4.2291454 4.1269821 4
## 329 329 4.1560941 3.9657191 4
## 330 330 3.9100256 4.2951495 4
## 331 331 NA 3.8895305 4
## 332 332 3.8239899 4.1661117 4
## 333 333 3.3083151 3.4777662 4
## 334 334 4.3415230 4.1187053 4
## 335 335 3.9312143 4.0712408 4
## 336 336 3.8007796 3.8484348 4
## 337 337 NA 3.4701330 4
## 338 338 3.2766816 3.9932455 4
## 339 339 3.7546550 4.6039884 4
## 340 340 3.6065517 4.3826314 4
## 341 341 4.4218779 3.9231241 4
## 342 342 NA 4.1839227 4
## 343 343 4.0421375 4.1250092 4
## 344 344 4.0905590 3.6298293 4
## 345 345 3.7393145 4.0518412 4
## 346 346 3.6922708 4.0118505 4
## 347 347 3.2629506 3.6010818 4
## 348 348 3.9450430 3.6383833 4
## 349 349 NA 3.9902396 4
## 350 350 3.8335546 4.0751249 4
## 351 351 3.0365398 3.9707823 4
## 352 352 3.5990194 4.5702123 4
## 353 353 4.0892900 3.6139801 4
## 354 354 3.9864916 3.8627303 4
## 355 355 4.0131637 3.8038534 4
## 356 356 4.0252756 4.4329086 4
## 357 357 4.4190276 3.7876161 4
## 358 358 4.1748940 4.2161852 4
## 359 359 3.6860696 3.7491322 4
## 360 360 NA 3.8821676 4
## 361 361 3.6214095 3.9572185 4
## 362 362 3.7729619 4.2351023 4
## 363 363 4.0053699 3.9977893 4
## 364 364 4.2968411 4.1106944 4
## 365 365 4.3472787 3.9834814 4
## 366 366 4.2006914 3.7829213 4
## 367 367 3.9609321 4.5616079 4
## 368 368 4.0589061 4.1490892 4
## 369 369 4.3897579 4.0635033 4
## 370 370 3.8204380 3.5928239 4
## 371 371 4.3655069 4.4265610 4
## 372 372 4.0523961 4.0961256 4
## 373 373 3.6651531 4.1207548 4
## 374 374 3.8623737 4.0098867 4
## 375 375 3.7550230 3.4420118 4
## 376 376 NA 4.5335275 4
## 377 377 NA 3.9644200 4
## 378 378 NA 3.9272499 4
## 379 379 3.5483790 4.6100401 4
## 380 380 NA 3.9796773 4
## 381 381 4.1418682 4.3316494 4
## 382 382 4.1430704 3.8302312 4
## 383 383 4.4167253 4.2131191 4
## 384 384 4.2517418 3.8564708 4
## 385 385 4.2964473 4.3320663 4
## 386 386 3.9881072 4.0797791 4
## 387 387 4.0790616 3.9764928 4
## 388 388 4.2799163 4.5571738 4
## 389 389 3.8732943 4.6022994 4
## 390 390 3.8629210 4.2231740 4
## 391 391 NA 4.5828260 4
## 392 392 3.2886044 3.0858300 4
## 393 393 3.8766071 4.4275967 4
## 394 394 4.1774254 3.9102585 4
## 395 395 NA 4.2649693 4
## 396 396 NA 3.8195696 4
## 397 397 4.4789976 4.0482910 4
## 398 398 3.9797508 3.6611005 4
## 399 399 NA 3.9675443 4
## 400 400 4.1275862 3.4571791 4
## 401 401 4.9528079 5.2055450 5
## 402 402 5.0878482 5.0924140 5
## 403 403 4.6741911 5.1163494 5
## 404 404 NA 4.7641224 5
## 405 405 NA 5.1184738 5
## 406 406 4.7550394 4.9441917 5
## 407 407 4.7763145 4.6512989 5
## 408 408 5.0783865 5.4003769 5
## 409 409 5.0655995 4.8273421 5
## 410 410 5.2608621 5.2467903 5
## 411 411 4.8916297 4.4663501 5
## 412 412 5.3116336 5.2716688 5
## 413 413 NA 5.2050814 5
## 414 414 5.2024178 4.5739802 5
## 415 415 4.8221563 5.0714366 5
## 416 416 NA 5.0584146 5
## 417 417 5.0932675 5.0877407 5
## 418 418 4.7602556 5.0493434 5
## 419 419 4.7298884 4.7414779 5
## 420 420 5.2205917 5.1662224 5
## 421 421 4.8877069 5.0124886 5
## 422 422 NA 4.9376019 5
## 423 423 4.9098111 4.9191843 5
## 424 424 4.8271294 4.8663331 5
## 425 425 5.0847552 5.1179263 5
## 426 426 4.3802274 5.1305315 5
## 427 427 5.4411971 5.3068429 5
## 428 428 4.8435579 5.2774962 5
## 429 429 5.0255470 5.3882214 5
## 430 430 NA 4.9545828 5
## 431 431 5.1579784 5.0488488 5
## 432 432 4.7694586 5.2445188 5
## 433 433 4.8090070 5.5523365 5
## 434 434 5.0644914 5.3426375 5
## 435 435 5.3879294 5.0977684 5
## 436 436 NA 5.2273494 5
## 437 437 5.0713278 4.7908216 5
## 438 438 5.4761548 4.8248370 5
## 439 439 NA 5.0183175 5
## 440 440 5.0139597 5.5861693 5
## 441 441 NA 4.8949456 5
## 442 442 NA 4.9600640 5
## 443 443 4.6685297 5.0280022 5
## 444 444 4.9063547 5.1587622 5
## 445 445 NA 4.7610045 5
## 446 446 NA 4.8838678 5
## 447 447 NA 5.6967682 5
## 448 448 4.8712238 4.7407695 5
## 449 449 5.6537227 5.1934508 5
## 450 450 5.2763778 5.0025880 5
## 451 451 4.7780855 5.2877895 5
## 452 452 5.1183572 5.1296841 5
## 453 453 5.8577331 5.1192475 5
## 454 454 NA 5.1126527 5
## 455 455 4.9210938 5.0188880 5
## 456 456 4.7925643 5.4408702 5
## 457 457 5.0880987 4.7718892 5
## 458 458 4.9638334 4.6413824 5
## 459 459 5.2045986 5.4291573 5
## 460 460 5.1928309 5.4566901 5
## 461 461 4.5885742 4.6579124 5
## 462 462 4.6955974 4.8846240 5
## 463 463 NA 5.4765303 5
## 464 464 NA 5.2072112 5
## 465 465 5.1722732 4.8891495 5
## 466 466 4.8913705 5.0382076 5
## 467 467 4.8732979 4.6625030 5
## 468 468 5.5956956 4.8497412 5
## 469 469 4.9823469 4.7980411 5
## 470 470 NA 5.3876489 5
## 471 471 NA 5.5381872 5
## 472 472 NA 5.6016612 5
## 473 473 4.8647725 4.5283884 5
## 474 474 NA 4.4978754 5
## 475 475 5.3232133 5.0866273 5
## 476 476 5.4520067 4.5584938 5
## 477 477 4.6397623 5.1159071 5
## 478 478 5.3549637 4.9362248 5
## 479 479 5.4418887 4.9147234 5
## 480 480 5.1208872 5.0612488 5
## 481 481 5.2072071 4.9552603 5
## 482 482 4.6556174 4.8887860 5
## 483 483 4.8571581 4.6189618 5
## 484 484 4.6487836 5.3243266 5
## 485 485 NA 5.1528013 5
## 486 486 4.8517082 4.7854893 5
## 487 487 5.3641726 5.0651947 5
## 488 488 5.2241704 4.9324635 5
## 489 489 4.7790376 4.9088442 5
## 490 490 5.0846269 4.9624876 5
## 491 491 5.2265442 5.4841561 5
## 492 492 5.1360284 4.9758763 5
## 493 493 5.3695591 4.6951398 5
## 494 494 5.4136573 4.2168497 5
## 495 495 4.8261880 4.6525210 5
## 496 496 4.9125355 5.1383930 5
## 497 497 4.9937645 5.1260706 5
## 498 498 5.1068414 5.0851730 5
## 499 499 NA 5.3040260 5
## 500 500 4.8450039 4.6849199 5
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 5,
metric = "euclid",
verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.962
and plotting the results (without the NA’s) we get:
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "cluster") +
scale_shape_discrete(name = "true cluster") +
geom_point(data = km$centers, size = 7, color = "black", shape = "X")
## Warning: Removed 100 rows containing missing values (`geom_point()`).
Let’s move to higher dimensions (and higher noise):
data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 1
Let’s compare it to R vanilla kmeans:
km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)
d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.6818182
We can see that kmeans++ clusters significantly better than R vanilla kmeans.
we can set the seed for the c++ random number generator, for reproducible results:
km1 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])
## [1] TRUE