Browse Source

Add kmeans clustering.

HEAD
Yang Luo 3 years ago
parent
commit
6feb77ab8d
8 changed files with 119 additions and 7 deletions
  1. +2
    -0
      go.mod
  2. +10
    -0
      go.sum
  3. +0
    -5
      object/dataset.go
  4. +4
    -2
      object/dataset_graph.go
  5. +67
    -0
      object/kmeans.go
  6. +9
    -0
      object/kmeans_test.go
  7. +21
    -0
      object/vector.go
  8. +6
    -0
      util/color.go

+ 2
- 0
go.mod View File

@@ -6,6 +6,8 @@ require (
github.com/astaxie/beego v1.12.3
github.com/casdoor/casdoor-go-sdk v0.3.3
github.com/go-sql-driver/mysql v1.6.0
github.com/muesli/clusters v0.0.0-20200529215643-2700303c1762
github.com/muesli/kmeans v0.3.0
github.com/tealeg/xlsx v1.0.5
xorm.io/core v0.7.3
xorm.io/xorm v1.2.5


+ 10
- 0
go.sum View File

@@ -35,6 +35,7 @@ gitea.com/xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a h1:lSA0F4e9A2NcQSqGq
gitea.com/xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a/go.mod h1:EXuID2Zs0pAQhH8yz+DNjUbjppKQzKFAn28TMYPB6IU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
github.com/Knetic/govaluate v3.0.0+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
@@ -149,6 +150,7 @@ github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zV
github.com/golang-jwt/jwt/v4 v4.1.0 h1:XUgk2Ex5veyVFVeLm0xhusUTQybEbexJXrvPNOKkSY0=
github.com/golang-jwt/jwt/v4 v4.1.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
@@ -319,6 +321,7 @@ github.com/lib/pq v1.3.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.10.2/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4=
github.com/lucasb-eyer/go-colorful v1.0.2/go.mod h1:0MS4r+7BZKSJ5mw4/S5MPN+qHFF1fYclkSPilDOKW0s=
github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ=
@@ -354,6 +357,11 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/muesli/clusters v0.0.0-20180605185049-a07a36e67d36/go.mod h1:mw5KDqUj0eLj/6DUNINLVJNoPTFkEuGMHtJsXLviLkY=
github.com/muesli/clusters v0.0.0-20200529215643-2700303c1762 h1:p4A2Jx7Lm3NV98VRMKlyWd3nqf8obft8NfXlAUmqd3I=
github.com/muesli/clusters v0.0.0-20200529215643-2700303c1762/go.mod h1:mw5KDqUj0eLj/6DUNINLVJNoPTFkEuGMHtJsXLviLkY=
github.com/muesli/kmeans v0.3.0 h1:cI2cpeS8m3pm+gTOdzl+7SlzZYSe+x0XoqXUyUvb1ro=
github.com/muesli/kmeans v0.3.0/go.mod h1:eNyybq0tX9/iBEP6EMU4Y7dpmGK0uEhODdZpnG1a/iQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg=
github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU=
@@ -477,6 +485,7 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1
github.com/ugorji/go v0.0.0-20171122102828-84cb69a8af83/go.mod h1:hnLbHMwcvSihnDhEfx2/BzKp2xb0Y+ErdfYcrs9tkJQ=
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/wcharczuk/go-chart/v2 v2.1.0/go.mod h1:yx7MvAVNcP/kN9lKXM/NTce4au4DFN99j6i1OwDclNA=
github.com/wendal/errors v0.0.0-20130201093226-f66c77a7882b/go.mod h1:Q12BUT7DqIlHRmgv3RskH+UCM/4eqVMgI0EMmlSpAXc=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
@@ -535,6 +544,7 @@ golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EH
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=


+ 0
- 5
object/dataset.go View File

@@ -7,11 +7,6 @@ import (
"xorm.io/core"
)

type Vector struct {
Name string `xorm:"varchar(100)" json:"name"`
Data []float64 `xorm:"varchar(1000)" json:"data"`
}

type Dataset struct {
Owner string `xorm:"varchar(100) notnull pk" json:"owner"`
Name string `xorm:"varchar(100) notnull pk" json:"name"`


+ 4
- 2
object/dataset_graph.go View File

@@ -91,7 +91,7 @@ func generateGraph(vectors []*Vector) *Graph {

linkValue := (1*(distance-7) + 10*(DistanceLimit-1-distance)) / (DistanceLimit - 8)
linkColor := "rgb(44,160,44,0.6)"
linkName := fmt.Sprintf("[%s] - [%s]: distance = %d, linkValue = %d", v1.Name, v2.Name, distance, linkValue)
linkName := fmt.Sprintf("Edge [%s] - [%s]: distance = %d, linkValue = %d", v1.Name, v2.Name, distance, linkValue)
fmt.Println(linkName)
g.addLink(linkName, v1.Name, v2.Name, linkValue, linkColor, "")
}
@@ -102,8 +102,10 @@ func generateGraph(vectors []*Vector) *Graph {
value := int(math.Sqrt(float64(nodeWeightMap[vector.Name]))) + 3

//nodeColor := "rgb(232,67,62)"
nodeColor := getNodeColor(value)
//nodeColor := getNodeColor(value)
nodeColor := vector.Color

fmt.Printf("Node [%s]: weight = %d, nodeValue = %d\n", vector.Name, nodeWeightMap[vector.Name], value)
g.addNode(vector.Name, vector.Name, value, nodeColor, "")
}



+ 67
- 0
object/kmeans.go View File

@@ -0,0 +1,67 @@
package object

import (
"fmt"
"strconv"
"strings"

"github.com/casbin/casbase/util"
"github.com/muesli/clusters"
"github.com/muesli/kmeans"
)

func fa2Str(floatArray []float64) string {
sData := []string{}
for _, f := range floatArray {
sData = append(sData, fmt.Sprintf("%f", f))
}
return strings.Join(sData, "|")
}

func runKmeans(vectors []*Vector) {
vectorMap := map[string]*Vector{}

var d clusters.Observations
for _, vector := range vectors {
if len(vector.Data) == 0 {
continue
}

dataKey := vector.GetDataKey()
vectorMap[dataKey] = vector

d = append(d, clusters.Coordinates(vector.Data))
}

km := kmeans.New()
cs, err := km.Partition(d, 100)
if err != nil {
panic(err)
}

for i, c := range cs {
fmt.Printf("Centered at x: %.2f y: %.2f\n", c.Center[0], c.Center[1])
fmt.Printf("Matching data points: %+v\n\n", c.Observations)

color := util.GetRandomColor()
for _, observation := range c.Observations {
floatArray := observation.Coordinates()
dataKey := fa2Str(floatArray)

vector, ok := vectorMap[dataKey]
if !ok {
panic(fmt.Errorf("vectorMap vector not found, dataKey = %s", dataKey))
}
vector.Category = strconv.Itoa(i)
vector.Color = color
}
}
}

func updateDatasetVectorCategories(owner string, datasetName string) {
dataset := getDataset(owner, datasetName)

runKmeans(dataset.Vectors)

UpdateDataset(dataset.GetId(), dataset)
}

+ 9
- 0
object/kmeans_test.go View File

@@ -0,0 +1,9 @@
package object

import "testing"

func TestUpdateDatasetVectorCategories(t *testing.T) {
InitConfig()

updateDatasetVectorCategories("admin", "word")
}

+ 21
- 0
object/vector.go View File

@@ -0,0 +1,21 @@
package object

import (
"fmt"
"strings"
)

type Vector struct {
Name string `xorm:"varchar(100)" json:"name"`
Category string `xorm:"varchar(100)" json:"category"`
Color string `xorm:"varchar(100)" json:"color"`
Data []float64 `xorm:"varchar(1000)" json:"data"`
}

func (vector *Vector) GetDataKey() string {
sData := []string{}
for _, f := range vector.Data {
sData = append(sData, fmt.Sprintf("%f", f))
}
return strings.Join(sData, "|")
}

+ 6
- 0
util/color.go View File

@@ -1,8 +1,10 @@
package util

import (
"fmt"
"image/color"
"math"
"math/rand"
)

func mixChannel(a uint8, b uint8, t float64) uint8 {
@@ -18,3 +20,7 @@ func MixColor(c1 color.RGBA, c2 color.RGBA, t float64) color.RGBA {
}
return res
}

func GetRandomColor() string {
return fmt.Sprintf("rgb(%d,%d,%d)", rand.Intn(256), rand.Intn(256), rand.Intn(256))
}

Loading…
Cancel
Save