basic usage

Aviezer Lifshitz

2023-03-21

Basic usage of the package.

Basic usage

First, let’s create 5 clusters normally distributed around 1 to 5, with sd of 0.3:

data <- simulate_data(n = 100, sd = 0.3, nclust = 5, dims = 2)
data
##      id        V1        V2 true_clust
## 1     1 0.3767173 0.9473130          1
## 2     2 0.7771952 0.4365945          1
## 3     3 1.2677965 1.3221844          1
## 4     4 0.8669975 1.3151264          1
## 5     5 1.2724007 0.8288909          1
## 6     6 1.1475845 0.7452437          1
## 7     7 0.9039467 0.5520469          1
## 8     8 0.7665078 0.9444070          1
## 9     9 1.0253056 0.7001926          1
## 10   10 0.8420768 0.7252489          1
## 11   11 1.0759952 1.0163106          1
## 12   12 0.7525193 0.7865068          1
## 13   13 0.7875251 1.3497340          1
## 14   14 0.6228121 0.9805805          1
## 15   15 0.6944935 1.2208939          1
## 16   16 1.2291597 1.9791167          1
## 17   17 1.0850682 1.4320827          1
## 18   18 0.9153876 0.9637182          1
## 19   19 1.7361625 1.0328868          1
## 20   20 1.0315178 1.1156138          1
## 21   21 0.8028178 1.1313384          1
## 22   22 1.3445547 1.2253213          1
## 23   23 1.1422975 1.0645733          1
## 24   24 1.2113962 0.8812399          1
## 25   25 1.1844728 1.0250569          1
## 26   26 0.2466337 1.4776891          1
## 27   27 1.6871284 1.2548285          1
## 28   28 0.8059647 1.0578151          1
## 29   29 0.7271369 0.5993674          1
## 30   30 1.0879096 0.7155586          1
## 31   31 1.1210407 1.8089211          1
## 32   32 1.2089941 1.1504575          1
## 33   33 0.8216845 1.2357720          1
## 34   34 1.4522292 0.7543033          1
## 35   35 1.5830919 0.4371217          1
## 36   36 1.0334194 0.5317554          1
## 37   37 0.9241629 1.2746520          1
## 38   38 0.8943483 0.8673550          1
## 39   39 1.0867575 1.3316706          1
## 40   40 0.5684744 1.0766575          1
## 41   41 0.9817616 1.2313768          1
## 42   42 0.6929054 1.2364633          1
## 43   43 0.5943770 1.0941386          1
## 44   44 1.6692845 1.1624366          1
## 45   45 0.9772522 1.1011560          1
## 46   46 0.5720378 1.2177179          1
## 47   47 0.9264959 1.1485001          1
## 48   48 0.4087713 1.0886696          1
## 49   49 0.7916948 1.3920948          1
## 50   50 0.4612196 1.1451793          1
## 51   51 1.0086479 1.0011060          1
## 52   52 1.0070108 0.9573810          1
## 53   53 1.2425391 1.0304969          1
## 54   54 1.3535816 0.7065712          1
## 55   55 1.4744849 0.4952795          1
## 56   56 1.2229512 0.9660392          1
## 57   57 1.3371796 0.8339075          1
## 58   58 1.0247336 0.4536824          1
## 59   59 0.7775405 1.1490041          1
## 60   60 1.1150210 1.0720482          1
## 61   61 0.7466844 0.8542125          1
## 62   62 1.1955397 1.5219069          1
## 63   63 0.8979006 0.6467794          1
## 64   64 1.2014007 1.0265304          1
## 65   65 0.1574428 1.2432242          1
## 66   66 0.7744559 1.0709800          1
## 67   67 0.8238785 0.8478476          1
## 68   68 0.9416013 0.7349223          1
## 69   69 0.9033257 0.8141168          1
## 70   70 0.4424237 0.5838100          1
## 71   71 0.8096776 1.2962844          1
## 72   72 1.3233941 1.0144875          1
## 73   73 0.8031583 1.6319976          1
## 74   74 1.3320322 1.1116487          1
## 75   75 1.1837934 0.8682858          1
## 76   76 1.6209222 0.3178315          1
## 77   77 0.9031032 0.9164385          1
## 78   78 0.8954044 1.4752830          1
## 79   79 0.6614979 0.6881379          1
## 80   80 0.9894612 1.0304082          1
## 81   81 0.6459521 0.8205572          1
## 82   82 1.2505716 1.0873547          1
## 83   83 1.1761018 1.0517744          1
## 84   84 1.5851996 0.9387091          1
## 85   85 0.9956294 0.8673277          1
## 86   86 1.2387546 1.2683582          1
## 87   87 0.6537069 0.7549864          1
## 88   88 1.3048125 0.6787272          1
## 89   89 0.9281717 0.9858927          1
## 90   90 1.3200650 0.9758446          1
## 91   91 1.4952188 1.0709705          1
## 92   92 1.1864362 0.9516226          1
## 93   93 0.9691448 1.5406666          1
## 94   94 0.9871847 1.0048342          1
## 95   95 0.8256279 1.0609859          1
## 96   96 1.3328611 0.9271259          1
## 97   97 1.3349069 1.0202728          1
## 98   98 0.7626447 1.1762738          1
## 99   99 0.7441846 0.8901470          1
## 100 100 1.0280014 1.4761237          1
## 101 101 1.7240498 2.1917667          2
## 102 102 2.1477589 2.2146340          2
## 103 103 1.7027469 2.0213280          2
## 104 104 2.1463023 2.0638567          2
## 105 105 2.4073865 2.2785103          2
## 106 106 2.1025445 1.3996684          2
## 107 107 2.3094964 1.8137042          2
## 108 108 1.5054438 1.8284215          2
## 109 109 1.2628418 1.7275650          2
## 110 110 1.8729456 1.7514444          2
## 111 111 2.2711728 2.0103012          2
## 112 112 1.8110873 2.0544719          2
## 113 113 1.4373040 2.5330542          2
## 114 114 2.3472602 2.5732635          2
## 115 115 1.8571581 1.3811372          2
## 116 116 1.7594145 1.9725059          2
## 117 117 2.1250675 2.3913515          2
## 118 118 2.2114631 1.8171498          2
## 119 119 1.9167138 2.0340057          2
## 120 120 1.7994075 1.8193712          2
## 121 121 1.8916990 2.0483636          2
## 122 122 1.7157171 2.3094967          2
## 123 123 1.7790713 2.4263179          2
## 124 124 2.2918029 1.0859611          2
## 125 125 1.9785292 2.3038684          2
## 126 126 2.0330563 1.6736483          2
## 127 127 2.8201623 2.2324056          2
## 128 128 2.0486865 1.7985402          2
## 129 129 1.5673919 1.5444837          2
## 130 130 2.2475204 1.3704727          2
## 131 131 2.1664586 2.1267505          2
## 132 132 2.1036168 1.8198495          2
## 133 133 2.0654851 1.6671382          2
## 134 134 1.9945100 1.9456569          2
## 135 135 1.8337013 2.0862003          2
## 136 136 2.3137056 1.6447256          2
## 137 137 1.5789334 1.4890468          2
## 138 138 2.2722327 2.0891428          2
## 139 139 2.3241516 2.0692744          2
## 140 140 2.1932210 2.1691581          2
## 141 141 2.0636184 1.7113753          2
## 142 142 2.6543953 2.2280597          2
## 143 143 1.9986698 1.8805143          2
## 144 144 1.9022112 1.8386501          2
## 145 145 2.0550513 2.2096079          2
## 146 146 1.8822028 2.2294607          2
## 147 147 2.0534039 2.6261139          2
## 148 148 2.1858917 2.5393751          2
## 149 149 2.2581082 1.6637601          2
## 150 150 1.6671152 2.0051653          2
## 151 151 2.0608000 1.7146325          2
## 152 152 2.1933840 2.0572412          2
## 153 153 1.7489826 1.7053830          2
## 154 154 1.9614406 2.0093789          2
## 155 155 2.4251870 2.1460354          2
## 156 156 2.1578004 1.3326680          2
## 157 157 1.8280531 1.9106273          2
## 158 158 2.1618254 2.2173712          2
## 159 159 2.4764937 2.3472773          2
## 160 160 1.5468318 2.3676396          2
## 161 161 1.8788354 2.4492316          2
## 162 162 1.6990724 2.0442601          2
## 163 163 2.2534504 1.9472414          2
## 164 164 2.1236685 2.1272375          2
## 165 165 2.2178681 2.6039467          2
## 166 166 1.9941017 1.8509873          2
## 167 167 1.8064223 1.8947630          2
## 168 168 2.1018646 1.5397721          2
## 169 169 2.1232171 2.1360815          2
## 170 170 2.0092199 2.1201713          2
## 171 171 1.6443690 1.9995858          2
## 172 172 1.6683686 2.5353654          2
## 173 173 1.4272309 1.9921655          2
## 174 174 1.3661323 2.3628861          2
## 175 175 2.0740349 2.5421528          2
## 176 176 2.0555251 1.4521124          2
## 177 177 1.6892600 1.4476947          2
## 178 178 1.6147121 2.0013555          2
## 179 179 1.3534581 1.8987578          2
## 180 180 2.3528521 1.9627606          2
## 181 181 1.6662967 2.0770224          2
## 182 182 2.3474273 1.4765016          2
## 183 183 1.5646400 1.5292227          2
## 184 184 1.9124392 1.8891854          2
## 185 185 1.9598714 2.0247285          2
## 186 186 2.0877701 1.9065968          2
## 187 187 2.2941633 1.8008229          2
## 188 188 2.3978545 2.1953938          2
## 189 189 2.0319868 2.5153169          2
## 190 190 1.7046119 1.7643745          2
## 191 191 1.6138429 1.6632780          2
## 192 192 2.0982337 2.1636975          2
## 193 193 2.5543183 2.2031899          2
## 194 194 1.8657742 2.7916297          2
## 195 195 2.0088602 1.7120612          2
## 196 196 1.8446500 2.1530686          2
## 197 197 1.7581656 2.3675160          2
## 198 198 1.7663722 2.1299673          2
## 199 199 2.1242548 1.8132569          2
## 200 200 2.0498030 2.2659930          2
## 201 201 3.4014600 3.5878311          3
## 202 202 3.6374637 2.8851439          3
## 203 203 3.3119046 3.4016763          3
## 204 204 3.2398718 3.1279438          3
## 205 205 2.9069991 3.1139049          3
## 206 206 3.6286900 3.1263548          3
## 207 207 3.0073556 2.9962502          3
## 208 208 3.1202868 3.1757186          3
## 209 209 3.1378234 2.6987379          3
## 210 210 2.9299413 2.9252239          3
## 211 211 2.5500608 2.8270395          3
## 212 212 3.1046580 2.7386459          3
## 213 213 3.4140302 2.9988731          3
## 214 214 2.8560988 3.3510093          3
## 215 215 3.3315660 2.5843607          3
## 216 216 3.0761561 2.6183031          3
## 217 217 3.4324299 2.6947791          3
## 218 218 2.5208764 2.9306676          3
## 219 219 3.0635919 2.6676517          3
## 220 220 2.8867122 2.7539662          3
## 221 221 3.4006310 2.8754211          3
## 222 222 2.6780094 3.2825047          3
## 223 223 2.6108648 2.6799331          3
## 224 224 2.8211712 3.2369790          3
## 225 225 2.7514521 2.6151687          3
## 226 226 3.0444776 2.6842558          3
## 227 227 3.1433062 3.1563466          3
## 228 228 2.7150084 2.9706850          3
## 229 229 3.0973033 2.7993101          3
## 230 230 2.8818278 2.8024633          3
## 231 231 3.0127776 2.9495391          3
## 232 232 2.9490148 3.0095152          3
## 233 233 3.1147094 3.2221070          3
## 234 234 3.0645069 3.1312490          3
## 235 235 2.9944222 3.0712954          3
## 236 236 3.3457507 3.3244889          3
## 237 237 2.8515466 2.4184105          3
## 238 238 3.0735926 3.1414431          3
## 239 239 3.2692307 3.0904445          3
## 240 240 2.8705738 3.2510183          3
## 241 241 3.2456386 2.6809704          3
## 242 242 3.4209469 3.1657808          3
## 243 243 2.4598513 2.8037340          3
## 244 244 3.0575639 2.7395245          3
## 245 245 3.0764313 3.4775483          3
## 246 246 3.1290985 3.0854845          3
## 247 247 3.2054891 3.3187393          3
## 248 248 3.0558586 2.7255177          3
## 249 249 3.1223712 2.5304063          3
## 250 250 3.0308103 3.2224918          3
## 251 251 3.2480612 3.3634026          3
## 252 252 3.1948875 3.3226132          3
## 253 253 3.1150878 2.6371873          3
## 254 254 2.9311233 2.6060601          3
## 255 255 2.8559950 3.2117923          3
## 256 256 3.1819779 3.1301060          3
## 257 257 3.1573751 2.7412442          3
## 258 258 3.1382019 2.7441654          3
## 259 259 2.9298448 2.9606736          3
## 260 260 2.9816983 2.7416725          3
## 261 261 3.2356440 2.2047582          3
## 262 262 2.8964914 2.9847514          3
## 263 263 2.9328276 3.1378569          3
## 264 264 2.8377260 2.9081859          3
## 265 265 2.6228130 2.6790083          3
## 266 266 3.0542829 3.2467586          3
## 267 267 2.2097800 2.9995353          3
## 268 268 3.1322535 2.4982152          3
## 269 269 2.4058656 2.3843438          3
## 270 270 2.7437235 3.2251702          3
## 271 271 3.1755176 2.6126634          3
## 272 272 3.4758594 3.4672082          3
## 273 273 3.2354723 2.7545875          3
## 274 274 3.1234780 3.0544978          3
## 275 275 3.3764887 2.5237407          3
## 276 276 2.7216100 2.5560206          3
## 277 277 2.8507435 3.0570732          3
## 278 278 2.7876514 2.8955042          3
## 279 279 2.9982702 3.7034991          3
## 280 280 2.7697049 3.1549488          3
## 281 281 3.3192365 2.9823753          3
## 282 282 2.9201597 3.0979474          3
## 283 283 3.5827237 3.1819142          3
## 284 284 3.0160687 2.5572207          3
## 285 285 2.2966058 2.8007284          3
## 286 286 2.6834115 3.2508303          3
## 287 287 2.9341241 2.8462358          3
## 288 288 3.3275874 2.9343699          3
## 289 289 2.9847950 3.6038591          3
## 290 290 2.6611595 2.8693601          3
## 291 291 2.9556805 2.9651621          3
## 292 292 3.0763826 3.0402241          3
## 293 293 2.8864817 3.0387177          3
## 294 294 3.0610897 3.4376058          3
## 295 295 3.3142024 2.9601696          3
## 296 296 2.3653202 3.7197664          3
## 297 297 2.9514580 2.9151938          3
## 298 298 3.2201968 2.6798721          3
## 299 299 3.0463166 2.6714989          3
## 300 300 3.1729396 3.0031066          3
## 301 301 5.0658976 4.1118427          4
## 302 302 4.3960411 4.4339442          4
## 303 303 4.1868534 4.1215591          4
## 304 304 4.1132864 3.8713248          4
## 305 305 3.8033336 4.0169877          4
## 306 306 4.1373131 3.7863779          4
## 307 307 3.4601896 4.3591492          4
## 308 308 4.4199619 3.8747844          4
## 309 309 3.7011369 4.0371309          4
## 310 310 3.8334790 3.6505311          4
## 311 311 3.8187098 3.8243625          4
## 312 312 3.5109187 3.4985160          4
## 313 313 3.6193299 3.8202263          4
## 314 314 4.2024148 4.0312200          4
## 315 315 3.7779494 3.7427711          4
## 316 316 3.7654909 4.2355863          4
## 317 317 4.1164779 3.7456796          4
## 318 318 3.4877721 4.0340104          4
## 319 319 3.5040767 3.9333184          4
## 320 320 4.0742307 4.3381556          4
## 321 321 3.6800416 3.9583730          4
## 322 322 4.2599479 3.6184553          4
## 323 323 3.6916683 4.2870491          4
## 324 324 4.2988735 3.9259953          4
## 325 325 3.6510664 4.2109721          4
## 326 326 3.7882583 3.9926424          4
## 327 327 4.1631527 4.3584797          4
## 328 328 4.2291454 4.1269821          4
## 329 329 4.1560941 3.9657191          4
## 330 330 3.9100256 4.2951495          4
## 331 331 4.1236208 3.8895305          4
## 332 332 3.8239899 4.1661117          4
## 333 333 3.3083151 3.4777662          4
## 334 334 4.3415230 4.1187053          4
## 335 335 3.9312143 4.0712408          4
## 336 336 3.8007796 3.8484348          4
## 337 337 3.9569270 3.4701330          4
## 338 338 3.2766816 3.9932455          4
## 339 339 3.7546550 4.6039884          4
## 340 340 3.6065517 4.3826314          4
## 341 341 4.4218779 3.9231241          4
## 342 342 4.6569220 4.1839227          4
## 343 343 4.0421375 4.1250092          4
## 344 344 4.0905590 3.6298293          4
## 345 345 3.7393145 4.0518412          4
## 346 346 3.6922708 4.0118505          4
## 347 347 3.2629506 3.6010818          4
## 348 348 3.9450430 3.6383833          4
## 349 349 3.6144095 3.9902396          4
## 350 350 3.8335546 4.0751249          4
## 351 351 3.0365398 3.9707823          4
## 352 352 3.5990194 4.5702123          4
## 353 353 4.0892900 3.6139801          4
## 354 354 3.9864916 3.8627303          4
## 355 355 4.0131637 3.8038534          4
## 356 356 4.0252756 4.4329086          4
## 357 357 4.4190276 3.7876161          4
## 358 358 4.1748940 4.2161852          4
## 359 359 3.6860696 3.7491322          4
## 360 360 4.1050336 3.8821676          4
## 361 361 3.6214095 3.9572185          4
## 362 362 3.7729619 4.2351023          4
## 363 363 4.0053699 3.9977893          4
## 364 364 4.2968411 4.1106944          4
## 365 365 4.3472787 3.9834814          4
## 366 366 4.2006914 3.7829213          4
## 367 367 3.9609321 4.5616079          4
## 368 368 4.0589061 4.1490892          4
## 369 369 4.3897579 4.0635033          4
## 370 370 3.8204380 3.5928239          4
## 371 371 4.3655069 4.4265610          4
## 372 372 4.0523961 4.0961256          4
## 373 373 3.6651531 4.1207548          4
## 374 374 3.8623737 4.0098867          4
## 375 375 3.7550230 3.4420118          4
## 376 376 3.7677814 4.5335275          4
## 377 377 4.2884102 3.9644200          4
## 378 378 3.9414273 3.9272499          4
## 379 379 3.5483790 4.6100401          4
## 380 380 4.3476525 3.9796773          4
## 381 381 4.1418682 4.3316494          4
## 382 382 4.1430704 3.8302312          4
## 383 383 4.4167253 4.2131191          4
## 384 384 4.2517418 3.8564708          4
## 385 385 4.2964473 4.3320663          4
## 386 386 3.9881072 4.0797791          4
## 387 387 4.0790616 3.9764928          4
## 388 388 4.2799163 4.5571738          4
## 389 389 3.8732943 4.6022994          4
## 390 390 3.8629210 4.2231740          4
## 391 391 3.3642462 4.5828260          4
## 392 392 3.2886044 3.0858300          4
## 393 393 3.8766071 4.4275967          4
## 394 394 4.1774254 3.9102585          4
## 395 395 3.7310281 4.2649693          4
## 396 396 3.8667199 3.8195696          4
## 397 397 4.4789976 4.0482910          4
## 398 398 3.9797508 3.6611005          4
## 399 399 4.6344643 3.9675443          4
## 400 400 4.1275862 3.4571791          4
## 401 401 4.9528079 5.2055450          5
## 402 402 5.0878482 5.0924140          5
## 403 403 4.6741911 5.1163494          5
## 404 404 5.0023908 4.7641224          5
## 405 405 4.5618298 5.1184738          5
## 406 406 4.7550394 4.9441917          5
## 407 407 4.7763145 4.6512989          5
## 408 408 5.0783865 5.4003769          5
## 409 409 5.0655995 4.8273421          5
## 410 410 5.2608621 5.2467903          5
## 411 411 4.8916297 4.4663501          5
## 412 412 5.3116336 5.2716688          5
## 413 413 4.8995723 5.2050814          5
## 414 414 5.2024178 4.5739802          5
## 415 415 4.8221563 5.0714366          5
## 416 416 4.8676445 5.0584146          5
## 417 417 5.0932675 5.0877407          5
## 418 418 4.7602556 5.0493434          5
## 419 419 4.7298884 4.7414779          5
## 420 420 5.2205917 5.1662224          5
## 421 421 4.8877069 5.0124886          5
## 422 422 4.7700879 4.9376019          5
## 423 423 4.9098111 4.9191843          5
## 424 424 4.8271294 4.8663331          5
## 425 425 5.0847552 5.1179263          5
## 426 426 4.3802274 5.1305315          5
## 427 427 5.4411971 5.3068429          5
## 428 428 4.8435579 5.2774962          5
## 429 429 5.0255470 5.3882214          5
## 430 430 5.2277374 4.9545828          5
## 431 431 5.1579784 5.0488488          5
## 432 432 4.7694586 5.2445188          5
## 433 433 4.8090070 5.5523365          5
## 434 434 5.0644914 5.3426375          5
## 435 435 5.3879294 5.0977684          5
## 436 436 4.6587048 5.2273494          5
## 437 437 5.0713278 4.7908216          5
## 438 438 5.4761548 4.8248370          5
## 439 439 4.7681518 5.0183175          5
## 440 440 5.0139597 5.5861693          5
## 441 441 4.6097091 4.8949456          5
## 442 442 4.6728384 4.9600640          5
## 443 443 4.6685297 5.0280022          5
## 444 444 4.9063547 5.1587622          5
## 445 445 5.5939132 4.7610045          5
## 446 446 4.7176943 4.8838678          5
## 447 447 5.2771058 5.6967682          5
## 448 448 4.8712238 4.7407695          5
## 449 449 5.6537227 5.1934508          5
## 450 450 5.2763778 5.0025880          5
## 451 451 4.7780855 5.2877895          5
## 452 452 5.1183572 5.1296841          5
## 453 453 5.8577331 5.1192475          5
## 454 454 5.2747807 5.1126527          5
## 455 455 4.9210938 5.0188880          5
## 456 456 4.7925643 5.4408702          5
## 457 457 5.0880987 4.7718892          5
## 458 458 4.9638334 4.6413824          5
## 459 459 5.2045986 5.4291573          5
## 460 460 5.1928309 5.4566901          5
## 461 461 4.5885742 4.6579124          5
## 462 462 4.6955974 4.8846240          5
## 463 463 4.9758209 5.4765303          5
## 464 464 5.3771165 5.2072112          5
## 465 465 5.1722732 4.8891495          5
## 466 466 4.8913705 5.0382076          5
## 467 467 4.8732979 4.6625030          5
## 468 468 5.5956956 4.8497412          5
## 469 469 4.9823469 4.7980411          5
## 470 470 4.7359295 5.3876489          5
## 471 471 5.1715411 5.5381872          5
## 472 472 4.7551571 5.6016612          5
## 473 473 4.8647725 4.5283884          5
## 474 474 4.6424446 4.4978754          5
## 475 475 5.3232133 5.0866273          5
## 476 476 5.4520067 4.5584938          5
## 477 477 4.6397623 5.1159071          5
## 478 478 5.3549637 4.9362248          5
## 479 479 5.4418887 4.9147234          5
## 480 480 5.1208872 5.0612488          5
## 481 481 5.2072071 4.9552603          5
## 482 482 4.6556174 4.8887860          5
## 483 483 4.8571581 4.6189618          5
## 484 484 4.6487836 5.3243266          5
## 485 485 4.8955029 5.1528013          5
## 486 486 4.8517082 4.7854893          5
## 487 487 5.3641726 5.0651947          5
## 488 488 5.2241704 4.9324635          5
## 489 489 4.7790376 4.9088442          5
## 490 490 5.0846269 4.9624876          5
## 491 491 5.2265442 5.4841561          5
## 492 492 5.1360284 4.9758763          5
## 493 493 5.3695591 4.6951398          5
## 494 494 5.4136573 4.2168497          5
## 495 495 4.8261880 4.6525210          5
## 496 496 4.9125355 5.1383930          5
## 497 497 4.9937645 5.1260706          5
## 498 498 5.1068414 5.0851730          5
## 499 499 5.1397088 5.3040260          5
## 500 500 4.8450039 4.6849199          5

This is how our data looks like:

data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "true cluster")

Now we can cluster it using kmeans++:

data_for_clust <- data %>% select(id, starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
    k = 5,
    metric = "euclid",
    verbose = TRUE
)
## id column: id
## KMEans: will generate seeds
## KMeans into generate seeds
## at seed 0
## add new core from 23 to 0
## at seed 1
## done update min distance
## seed range 350 450
## picked up 411 dist was 3.00847
## add new core from 411 to 1
## at seed 2
## done update min distance
## seed range 300 400
## picked up 249 dist was 1.43524
## add new core from 249 to 2
## at seed 3
## done update min distance
## seed range 250 350
## picked up 323 dist was 0.742448
## add new core from 323 to 3
## at seed 4
## done update min distance
## seed range 200 300
## picked up 130 dist was 0.67049
## add new core from 130 to 4
## KMEans: reassign after init
## KMEans: iter 0
## KMEans: iter 1 changed 7
## KMEans: iter 1
## KMEans: iter 2 changed 3
## KMEans: iter 2
## KMEans: iter 3 changed 0

The returned list contains 3 fields:

names(km)
## [1] "centers" "cluster" "size"

km$centers contains a tibble with clust column and the cluster centers:

km$centers
## # A tibble: 5 × 3
##   clust    V1    V2
##   <int> <dbl> <dbl>
## 1     1  3.04  2.98
## 2     2  1.98  2.00
## 3     3  3.98  4.05
## 4     4  1.00  1.02
## 5     5  5.01  5.02

clusters are numbered according to order_func (see ‘Custom cluster ordering’ section).

km$cluster contains tibble with id column with the observation id (1:n if no id column was supplied), and clust column with the observation assigned cluster:

km$cluster
## # A tibble: 500 × 2
##    id    clust
##    <chr> <int>
##  1 1         4
##  2 2         4
##  3 3         4
##  4 4         4
##  5 5         4
##  6 6         4
##  7 7         4
##  8 8         4
##  9 9         4
## 10 10        4
## # … with 490 more rows

km$size contains tibble with clust column and n column with the number of points in each cluster:

km$size
## # A tibble: 5 × 2
##   clust     n
##   <int> <int>
## 1     1   104
## 2     2   100
## 3     3    95
## 4     4   100
## 5     5   101

We can now check our clustering performance - fraction of observations that were classified correctly (Note that match_clusters function is internal to the package and is used only in this vignette):

d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.982

And plot the results:

d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")

Custom cluster ordering

By default, the clusters where ordered using the following function: hclust(dist(cor(t(centers)))) - hclust of the euclidean distance of the correlation matrix of the centers.

We can supply our own function to order the clusters using reorder_func argument. The function would be applied to each center and he clusters would be ordered by the result.

km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE,
    reorder_func = median
)
km$centers
## # A tibble: 5 × 3
##   clust    V1    V2
##   <int> <dbl> <dbl>
## 1     1 0.999  1.01
## 2     2 1.97   2.00
## 3     3 3.04   2.98
## 4     4 3.98   4.05
## 5     5 5.01   5.02

Missing data

tglkmeans can deal with missing data, as long as at least one dimension is not missing. for example:

data$V1[sample(1:nrow(data), round(nrow(data) * 0.2))] <- NA
data
##      id        V1        V2 true_clust
## 1     1 0.3767173 0.9473130          1
## 2     2 0.7771952 0.4365945          1
## 3     3        NA 1.3221844          1
## 4     4        NA 1.3151264          1
## 5     5 1.2724007 0.8288909          1
## 6     6 1.1475845 0.7452437          1
## 7     7 0.9039467 0.5520469          1
## 8     8 0.7665078 0.9444070          1
## 9     9 1.0253056 0.7001926          1
## 10   10        NA 0.7252489          1
## 11   11 1.0759952 1.0163106          1
## 12   12        NA 0.7865068          1
## 13   13 0.7875251 1.3497340          1
## 14   14        NA 0.9805805          1
## 15   15        NA 1.2208939          1
## 16   16        NA 1.9791167          1
## 17   17 1.0850682 1.4320827          1
## 18   18 0.9153876 0.9637182          1
## 19   19 1.7361625 1.0328868          1
## 20   20 1.0315178 1.1156138          1
## 21   21        NA 1.1313384          1
## 22   22 1.3445547 1.2253213          1
## 23   23 1.1422975 1.0645733          1
## 24   24 1.2113962 0.8812399          1
## 25   25 1.1844728 1.0250569          1
## 26   26 0.2466337 1.4776891          1
## 27   27 1.6871284 1.2548285          1
## 28   28 0.8059647 1.0578151          1
## 29   29 0.7271369 0.5993674          1
## 30   30        NA 0.7155586          1
## 31   31 1.1210407 1.8089211          1
## 32   32        NA 1.1504575          1
## 33   33 0.8216845 1.2357720          1
## 34   34 1.4522292 0.7543033          1
## 35   35        NA 0.4371217          1
## 36   36 1.0334194 0.5317554          1
## 37   37 0.9241629 1.2746520          1
## 38   38 0.8943483 0.8673550          1
## 39   39 1.0867575 1.3316706          1
## 40   40 0.5684744 1.0766575          1
## 41   41 0.9817616 1.2313768          1
## 42   42 0.6929054 1.2364633          1
## 43   43        NA 1.0941386          1
## 44   44 1.6692845 1.1624366          1
## 45   45 0.9772522 1.1011560          1
## 46   46 0.5720378 1.2177179          1
## 47   47        NA 1.1485001          1
## 48   48 0.4087713 1.0886696          1
## 49   49 0.7916948 1.3920948          1
## 50   50 0.4612196 1.1451793          1
## 51   51 1.0086479 1.0011060          1
## 52   52 1.0070108 0.9573810          1
## 53   53 1.2425391 1.0304969          1
## 54   54 1.3535816 0.7065712          1
## 55   55        NA 0.4952795          1
## 56   56        NA 0.9660392          1
## 57   57 1.3371796 0.8339075          1
## 58   58        NA 0.4536824          1
## 59   59 0.7775405 1.1490041          1
## 60   60        NA 1.0720482          1
## 61   61 0.7466844 0.8542125          1
## 62   62        NA 1.5219069          1
## 63   63 0.8979006 0.6467794          1
## 64   64 1.2014007 1.0265304          1
## 65   65        NA 1.2432242          1
## 66   66 0.7744559 1.0709800          1
## 67   67 0.8238785 0.8478476          1
## 68   68 0.9416013 0.7349223          1
## 69   69 0.9033257 0.8141168          1
## 70   70 0.4424237 0.5838100          1
## 71   71 0.8096776 1.2962844          1
## 72   72 1.3233941 1.0144875          1
## 73   73 0.8031583 1.6319976          1
## 74   74        NA 1.1116487          1
## 75   75 1.1837934 0.8682858          1
## 76   76 1.6209222 0.3178315          1
## 77   77        NA 0.9164385          1
## 78   78 0.8954044 1.4752830          1
## 79   79 0.6614979 0.6881379          1
## 80   80 0.9894612 1.0304082          1
## 81   81        NA 0.8205572          1
## 82   82 1.2505716 1.0873547          1
## 83   83 1.1761018 1.0517744          1
## 84   84 1.5851996 0.9387091          1
## 85   85 0.9956294 0.8673277          1
## 86   86 1.2387546 1.2683582          1
## 87   87 0.6537069 0.7549864          1
## 88   88 1.3048125 0.6787272          1
## 89   89 0.9281717 0.9858927          1
## 90   90 1.3200650 0.9758446          1
## 91   91 1.4952188 1.0709705          1
## 92   92 1.1864362 0.9516226          1
## 93   93        NA 1.5406666          1
## 94   94 0.9871847 1.0048342          1
## 95   95 0.8256279 1.0609859          1
## 96   96 1.3328611 0.9271259          1
## 97   97 1.3349069 1.0202728          1
## 98   98 0.7626447 1.1762738          1
## 99   99 0.7441846 0.8901470          1
## 100 100        NA 1.4761237          1
## 101 101        NA 2.1917667          2
## 102 102 2.1477589 2.2146340          2
## 103 103 1.7027469 2.0213280          2
## 104 104 2.1463023 2.0638567          2
## 105 105 2.4073865 2.2785103          2
## 106 106 2.1025445 1.3996684          2
## 107 107 2.3094964 1.8137042          2
## 108 108        NA 1.8284215          2
## 109 109 1.2628418 1.7275650          2
## 110 110 1.8729456 1.7514444          2
## 111 111 2.2711728 2.0103012          2
## 112 112 1.8110873 2.0544719          2
## 113 113 1.4373040 2.5330542          2
## 114 114        NA 2.5732635          2
## 115 115        NA 1.3811372          2
## 116 116 1.7594145 1.9725059          2
## 117 117 2.1250675 2.3913515          2
## 118 118 2.2114631 1.8171498          2
## 119 119 1.9167138 2.0340057          2
## 120 120 1.7994075 1.8193712          2
## 121 121 1.8916990 2.0483636          2
## 122 122 1.7157171 2.3094967          2
## 123 123 1.7790713 2.4263179          2
## 124 124 2.2918029 1.0859611          2
## 125 125 1.9785292 2.3038684          2
## 126 126 2.0330563 1.6736483          2
## 127 127 2.8201623 2.2324056          2
## 128 128 2.0486865 1.7985402          2
## 129 129 1.5673919 1.5444837          2
## 130 130 2.2475204 1.3704727          2
## 131 131 2.1664586 2.1267505          2
## 132 132 2.1036168 1.8198495          2
## 133 133        NA 1.6671382          2
## 134 134 1.9945100 1.9456569          2
## 135 135 1.8337013 2.0862003          2
## 136 136 2.3137056 1.6447256          2
## 137 137 1.5789334 1.4890468          2
## 138 138 2.2722327 2.0891428          2
## 139 139 2.3241516 2.0692744          2
## 140 140 2.1932210 2.1691581          2
## 141 141        NA 1.7113753          2
## 142 142 2.6543953 2.2280597          2
## 143 143        NA 1.8805143          2
## 144 144 1.9022112 1.8386501          2
## 145 145 2.0550513 2.2096079          2
## 146 146 1.8822028 2.2294607          2
## 147 147 2.0534039 2.6261139          2
## 148 148 2.1858917 2.5393751          2
## 149 149 2.2581082 1.6637601          2
## 150 150        NA 2.0051653          2
## 151 151 2.0608000 1.7146325          2
## 152 152        NA 2.0572412          2
## 153 153 1.7489826 1.7053830          2
## 154 154 1.9614406 2.0093789          2
## 155 155        NA 2.1460354          2
## 156 156 2.1578004 1.3326680          2
## 157 157 1.8280531 1.9106273          2
## 158 158 2.1618254 2.2173712          2
## 159 159 2.4764937 2.3472773          2
## 160 160 1.5468318 2.3676396          2
## 161 161 1.8788354 2.4492316          2
## 162 162 1.6990724 2.0442601          2
## 163 163 2.2534504 1.9472414          2
## 164 164 2.1236685 2.1272375          2
## 165 165 2.2178681 2.6039467          2
## 166 166 1.9941017 1.8509873          2
## 167 167 1.8064223 1.8947630          2
## 168 168 2.1018646 1.5397721          2
## 169 169 2.1232171 2.1360815          2
## 170 170        NA 2.1201713          2
## 171 171 1.6443690 1.9995858          2
## 172 172 1.6683686 2.5353654          2
## 173 173 1.4272309 1.9921655          2
## 174 174        NA 2.3628861          2
## 175 175        NA 2.5421528          2
## 176 176 2.0555251 1.4521124          2
## 177 177 1.6892600 1.4476947          2
## 178 178 1.6147121 2.0013555          2
## 179 179 1.3534581 1.8987578          2
## 180 180 2.3528521 1.9627606          2
## 181 181 1.6662967 2.0770224          2
## 182 182 2.3474273 1.4765016          2
## 183 183 1.5646400 1.5292227          2
## 184 184        NA 1.8891854          2
## 185 185        NA 2.0247285          2
## 186 186 2.0877701 1.9065968          2
## 187 187 2.2941633 1.8008229          2
## 188 188 2.3978545 2.1953938          2
## 189 189 2.0319868 2.5153169          2
## 190 190 1.7046119 1.7643745          2
## 191 191        NA 1.6632780          2
## 192 192 2.0982337 2.1636975          2
## 193 193 2.5543183 2.2031899          2
## 194 194        NA 2.7916297          2
## 195 195        NA 1.7120612          2
## 196 196 1.8446500 2.1530686          2
## 197 197 1.7581656 2.3675160          2
## 198 198 1.7663722 2.1299673          2
## 199 199 2.1242548 1.8132569          2
## 200 200 2.0498030 2.2659930          2
## 201 201 3.4014600 3.5878311          3
## 202 202        NA 2.8851439          3
## 203 203 3.3119046 3.4016763          3
## 204 204 3.2398718 3.1279438          3
## 205 205 2.9069991 3.1139049          3
## 206 206 3.6286900 3.1263548          3
## 207 207 3.0073556 2.9962502          3
## 208 208 3.1202868 3.1757186          3
## 209 209        NA 2.6987379          3
## 210 210 2.9299413 2.9252239          3
## 211 211 2.5500608 2.8270395          3
## 212 212 3.1046580 2.7386459          3
## 213 213 3.4140302 2.9988731          3
## 214 214 2.8560988 3.3510093          3
## 215 215        NA 2.5843607          3
## 216 216 3.0761561 2.6183031          3
## 217 217 3.4324299 2.6947791          3
## 218 218 2.5208764 2.9306676          3
## 219 219        NA 2.6676517          3
## 220 220 2.8867122 2.7539662          3
## 221 221        NA 2.8754211          3
## 222 222 2.6780094 3.2825047          3
## 223 223        NA 2.6799331          3
## 224 224 2.8211712 3.2369790          3
## 225 225 2.7514521 2.6151687          3
## 226 226 3.0444776 2.6842558          3
## 227 227 3.1433062 3.1563466          3
## 228 228 2.7150084 2.9706850          3
## 229 229 3.0973033 2.7993101          3
## 230 230 2.8818278 2.8024633          3
## 231 231 3.0127776 2.9495391          3
## 232 232 2.9490148 3.0095152          3
## 233 233 3.1147094 3.2221070          3
## 234 234 3.0645069 3.1312490          3
## 235 235 2.9944222 3.0712954          3
## 236 236 3.3457507 3.3244889          3
## 237 237 2.8515466 2.4184105          3
## 238 238 3.0735926 3.1414431          3
## 239 239 3.2692307 3.0904445          3
## 240 240 2.8705738 3.2510183          3
## 241 241 3.2456386 2.6809704          3
## 242 242        NA 3.1657808          3
## 243 243 2.4598513 2.8037340          3
## 244 244 3.0575639 2.7395245          3
## 245 245 3.0764313 3.4775483          3
## 246 246 3.1290985 3.0854845          3
## 247 247 3.2054891 3.3187393          3
## 248 248 3.0558586 2.7255177          3
## 249 249        NA 2.5304063          3
## 250 250 3.0308103 3.2224918          3
## 251 251 3.2480612 3.3634026          3
## 252 252 3.1948875 3.3226132          3
## 253 253        NA 2.6371873          3
## 254 254 2.9311233 2.6060601          3
## 255 255 2.8559950 3.2117923          3
## 256 256 3.1819779 3.1301060          3
## 257 257 3.1573751 2.7412442          3
## 258 258 3.1382019 2.7441654          3
## 259 259 2.9298448 2.9606736          3
## 260 260 2.9816983 2.7416725          3
## 261 261 3.2356440 2.2047582          3
## 262 262        NA 2.9847514          3
## 263 263 2.9328276 3.1378569          3
## 264 264        NA 2.9081859          3
## 265 265 2.6228130 2.6790083          3
## 266 266        NA 3.2467586          3
## 267 267 2.2097800 2.9995353          3
## 268 268        NA 2.4982152          3
## 269 269 2.4058656 2.3843438          3
## 270 270 2.7437235 3.2251702          3
## 271 271 3.1755176 2.6126634          3
## 272 272 3.4758594 3.4672082          3
## 273 273 3.2354723 2.7545875          3
## 274 274 3.1234780 3.0544978          3
## 275 275        NA 2.5237407          3
## 276 276 2.7216100 2.5560206          3
## 277 277        NA 3.0570732          3
## 278 278 2.7876514 2.8955042          3
## 279 279 2.9982702 3.7034991          3
## 280 280 2.7697049 3.1549488          3
## 281 281 3.3192365 2.9823753          3
## 282 282 2.9201597 3.0979474          3
## 283 283 3.5827237 3.1819142          3
## 284 284 3.0160687 2.5572207          3
## 285 285        NA 2.8007284          3
## 286 286        NA 3.2508303          3
## 287 287        NA 2.8462358          3
## 288 288 3.3275874 2.9343699          3
## 289 289 2.9847950 3.6038591          3
## 290 290 2.6611595 2.8693601          3
## 291 291 2.9556805 2.9651621          3
## 292 292 3.0763826 3.0402241          3
## 293 293 2.8864817 3.0387177          3
## 294 294 3.0610897 3.4376058          3
## 295 295 3.3142024 2.9601696          3
## 296 296 2.3653202 3.7197664          3
## 297 297 2.9514580 2.9151938          3
## 298 298 3.2201968 2.6798721          3
## 299 299 3.0463166 2.6714989          3
## 300 300 3.1729396 3.0031066          3
## 301 301 5.0658976 4.1118427          4
## 302 302 4.3960411 4.4339442          4
## 303 303 4.1868534 4.1215591          4
## 304 304 4.1132864 3.8713248          4
## 305 305 3.8033336 4.0169877          4
## 306 306 4.1373131 3.7863779          4
## 307 307 3.4601896 4.3591492          4
## 308 308 4.4199619 3.8747844          4
## 309 309 3.7011369 4.0371309          4
## 310 310        NA 3.6505311          4
## 311 311 3.8187098 3.8243625          4
## 312 312 3.5109187 3.4985160          4
## 313 313 3.6193299 3.8202263          4
## 314 314 4.2024148 4.0312200          4
## 315 315 3.7779494 3.7427711          4
## 316 316 3.7654909 4.2355863          4
## 317 317        NA 3.7456796          4
## 318 318 3.4877721 4.0340104          4
## 319 319        NA 3.9333184          4
## 320 320        NA 4.3381556          4
## 321 321 3.6800416 3.9583730          4
## 322 322 4.2599479 3.6184553          4
## 323 323 3.6916683 4.2870491          4
## 324 324        NA 3.9259953          4
## 325 325 3.6510664 4.2109721          4
## 326 326 3.7882583 3.9926424          4
## 327 327 4.1631527 4.3584797          4
## 328 328 4.2291454 4.1269821          4
## 329 329 4.1560941 3.9657191          4
## 330 330 3.9100256 4.2951495          4
## 331 331        NA 3.8895305          4
## 332 332 3.8239899 4.1661117          4
## 333 333 3.3083151 3.4777662          4
## 334 334 4.3415230 4.1187053          4
## 335 335 3.9312143 4.0712408          4
## 336 336 3.8007796 3.8484348          4
## 337 337        NA 3.4701330          4
## 338 338 3.2766816 3.9932455          4
## 339 339 3.7546550 4.6039884          4
## 340 340 3.6065517 4.3826314          4
## 341 341 4.4218779 3.9231241          4
## 342 342        NA 4.1839227          4
## 343 343 4.0421375 4.1250092          4
## 344 344 4.0905590 3.6298293          4
## 345 345 3.7393145 4.0518412          4
## 346 346 3.6922708 4.0118505          4
## 347 347 3.2629506 3.6010818          4
## 348 348 3.9450430 3.6383833          4
## 349 349        NA 3.9902396          4
## 350 350 3.8335546 4.0751249          4
## 351 351 3.0365398 3.9707823          4
## 352 352 3.5990194 4.5702123          4
## 353 353 4.0892900 3.6139801          4
## 354 354 3.9864916 3.8627303          4
## 355 355 4.0131637 3.8038534          4
## 356 356 4.0252756 4.4329086          4
## 357 357 4.4190276 3.7876161          4
## 358 358 4.1748940 4.2161852          4
## 359 359 3.6860696 3.7491322          4
## 360 360        NA 3.8821676          4
## 361 361 3.6214095 3.9572185          4
## 362 362 3.7729619 4.2351023          4
## 363 363 4.0053699 3.9977893          4
## 364 364 4.2968411 4.1106944          4
## 365 365 4.3472787 3.9834814          4
## 366 366 4.2006914 3.7829213          4
## 367 367 3.9609321 4.5616079          4
## 368 368 4.0589061 4.1490892          4
## 369 369 4.3897579 4.0635033          4
## 370 370 3.8204380 3.5928239          4
## 371 371 4.3655069 4.4265610          4
## 372 372 4.0523961 4.0961256          4
## 373 373 3.6651531 4.1207548          4
## 374 374 3.8623737 4.0098867          4
## 375 375 3.7550230 3.4420118          4
## 376 376        NA 4.5335275          4
## 377 377        NA 3.9644200          4
## 378 378        NA 3.9272499          4
## 379 379 3.5483790 4.6100401          4
## 380 380        NA 3.9796773          4
## 381 381 4.1418682 4.3316494          4
## 382 382 4.1430704 3.8302312          4
## 383 383 4.4167253 4.2131191          4
## 384 384 4.2517418 3.8564708          4
## 385 385 4.2964473 4.3320663          4
## 386 386 3.9881072 4.0797791          4
## 387 387 4.0790616 3.9764928          4
## 388 388 4.2799163 4.5571738          4
## 389 389 3.8732943 4.6022994          4
## 390 390 3.8629210 4.2231740          4
## 391 391        NA 4.5828260          4
## 392 392 3.2886044 3.0858300          4
## 393 393 3.8766071 4.4275967          4
## 394 394 4.1774254 3.9102585          4
## 395 395        NA 4.2649693          4
## 396 396        NA 3.8195696          4
## 397 397 4.4789976 4.0482910          4
## 398 398 3.9797508 3.6611005          4
## 399 399        NA 3.9675443          4
## 400 400 4.1275862 3.4571791          4
## 401 401 4.9528079 5.2055450          5
## 402 402 5.0878482 5.0924140          5
## 403 403 4.6741911 5.1163494          5
## 404 404        NA 4.7641224          5
## 405 405        NA 5.1184738          5
## 406 406 4.7550394 4.9441917          5
## 407 407 4.7763145 4.6512989          5
## 408 408 5.0783865 5.4003769          5
## 409 409 5.0655995 4.8273421          5
## 410 410 5.2608621 5.2467903          5
## 411 411 4.8916297 4.4663501          5
## 412 412 5.3116336 5.2716688          5
## 413 413        NA 5.2050814          5
## 414 414 5.2024178 4.5739802          5
## 415 415 4.8221563 5.0714366          5
## 416 416        NA 5.0584146          5
## 417 417 5.0932675 5.0877407          5
## 418 418 4.7602556 5.0493434          5
## 419 419 4.7298884 4.7414779          5
## 420 420 5.2205917 5.1662224          5
## 421 421 4.8877069 5.0124886          5
## 422 422        NA 4.9376019          5
## 423 423 4.9098111 4.9191843          5
## 424 424 4.8271294 4.8663331          5
## 425 425 5.0847552 5.1179263          5
## 426 426 4.3802274 5.1305315          5
## 427 427 5.4411971 5.3068429          5
## 428 428 4.8435579 5.2774962          5
## 429 429 5.0255470 5.3882214          5
## 430 430        NA 4.9545828          5
## 431 431 5.1579784 5.0488488          5
## 432 432 4.7694586 5.2445188          5
## 433 433 4.8090070 5.5523365          5
## 434 434 5.0644914 5.3426375          5
## 435 435 5.3879294 5.0977684          5
## 436 436        NA 5.2273494          5
## 437 437 5.0713278 4.7908216          5
## 438 438 5.4761548 4.8248370          5
## 439 439        NA 5.0183175          5
## 440 440 5.0139597 5.5861693          5
## 441 441        NA 4.8949456          5
## 442 442        NA 4.9600640          5
## 443 443 4.6685297 5.0280022          5
## 444 444 4.9063547 5.1587622          5
## 445 445        NA 4.7610045          5
## 446 446        NA 4.8838678          5
## 447 447        NA 5.6967682          5
## 448 448 4.8712238 4.7407695          5
## 449 449 5.6537227 5.1934508          5
## 450 450 5.2763778 5.0025880          5
## 451 451 4.7780855 5.2877895          5
## 452 452 5.1183572 5.1296841          5
## 453 453 5.8577331 5.1192475          5
## 454 454        NA 5.1126527          5
## 455 455 4.9210938 5.0188880          5
## 456 456 4.7925643 5.4408702          5
## 457 457 5.0880987 4.7718892          5
## 458 458 4.9638334 4.6413824          5
## 459 459 5.2045986 5.4291573          5
## 460 460 5.1928309 5.4566901          5
## 461 461 4.5885742 4.6579124          5
## 462 462 4.6955974 4.8846240          5
## 463 463        NA 5.4765303          5
## 464 464        NA 5.2072112          5
## 465 465 5.1722732 4.8891495          5
## 466 466 4.8913705 5.0382076          5
## 467 467 4.8732979 4.6625030          5
## 468 468 5.5956956 4.8497412          5
## 469 469 4.9823469 4.7980411          5
## 470 470        NA 5.3876489          5
## 471 471        NA 5.5381872          5
## 472 472        NA 5.6016612          5
## 473 473 4.8647725 4.5283884          5
## 474 474        NA 4.4978754          5
## 475 475 5.3232133 5.0866273          5
## 476 476 5.4520067 4.5584938          5
## 477 477 4.6397623 5.1159071          5
## 478 478 5.3549637 4.9362248          5
## 479 479 5.4418887 4.9147234          5
## 480 480 5.1208872 5.0612488          5
## 481 481 5.2072071 4.9552603          5
## 482 482 4.6556174 4.8887860          5
## 483 483 4.8571581 4.6189618          5
## 484 484 4.6487836 5.3243266          5
## 485 485        NA 5.1528013          5
## 486 486 4.8517082 4.7854893          5
## 487 487 5.3641726 5.0651947          5
## 488 488 5.2241704 4.9324635          5
## 489 489 4.7790376 4.9088442          5
## 490 490 5.0846269 4.9624876          5
## 491 491 5.2265442 5.4841561          5
## 492 492 5.1360284 4.9758763          5
## 493 493 5.3695591 4.6951398          5
## 494 494 5.4136573 4.2168497          5
## 495 495 4.8261880 4.6525210          5
## 496 496 4.9125355 5.1383930          5
## 497 497 4.9937645 5.1260706          5
## 498 498 5.1068414 5.0851730          5
## 499 499        NA 5.3040260          5
## 500 500 4.8450039 4.6849199          5
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.962

and plotting the results (without the NA’s) we get:

d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")
## Warning: Removed 100 rows containing missing values (`geom_point()`).

High dimensions

Let’s move to higher dimensions (and higher noise):

data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 1

Comparison with R vanilla kmeans

Let’s compare it to R vanilla kmeans:

km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)

d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.6818182

We can see that kmeans++ clusters significantly better than R vanilla kmeans.

Random seed

we can set the seed for the c++ random number generator, for reproducible results:

km1 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])
## [1] TRUE