创建用于训练的数据表并插入训练数据。
DROP TABLE IF EXISTS km_sample;
CREATE TABLE km_sample
(
pid INT,
points BLOB COMMENT 'gbase_array_type double[]'
);
INSERT INTO km_sample (pid, points)
VALUES
(1, ARRAY DOUBLE [1100, 1100]),
(2, ARRAY DOUBLE [1220, 1110]),
(3, ARRAY DOUBLE [-1080, 1190]),
(4, ARRAY DOUBLE [1100, -1100]),
(5, ARRAY DOUBLE [1080, -1190]),
(6, ARRAY DOUBLE [1080, 1190]),
(7, ARRAY DOUBLE [1220, -1110]),
(8, ARRAY DOUBLE [-1100, -1100]),
(9, ARRAY DOUBLE [-1080, -1190]),
(10, ARRAY DOUBLE [-1220, -1110]),
(11, ARRAY DOUBLE [1300, 1400]),
(12, ARRAY DOUBLE [-1300, -1400]),
GBase 8a MPP Cluster 产品手册
5 数据库管理指南
文档版本953(2022-04-10)
南大通用数据技术股份有限公司
1446
(13, ARRAY DOUBLE [-1100, 1100]),
(14, ARRAY DOUBLE [1301, -1400]),
(15, ARRAY DOUBLE [-1220, 1110]),
(16, ARRAY DOUBLE [-1300, 1400])
;
对输入进行聚类,以kmeanspp 为例:
SELECT Mllib.kmeanspp ('madtest.km_sample',
'points',
4,
'squared_dist_norm2',
20, 0.001, 1.0);
查看结果表。
gbase> select iteration, array_text(centroids), frac_reassigned from km_sample_result \G;
*************************** 1. row ***************************
iteration: 3
array_text(centroids): {{-1175,1200},{-1175,-1200},{1175.25,-1200},{1175,1200}}
frac_reassigned: 0
1 row in set (Elapsed: 00:00:00.00)
通过closest_column 函数显示具体分组:
gbase>SELECT
_src.pid AS pid,
array_text(_src.points) AS point,
closest_column
(
(
SELECT
rel_result.centroids
FROM
km_sample_result as rel_result
),
_src.points,
'squared_dist_norm2',
GBase 8a MPP Cluster 产品手册
5 数据库管理指南
文档版本953(2022-04-10)
南大通用数据技术股份有限公司
1447
'squared_dist_norm2'
)
AS cluster_id
FROM km_sample AS _src
ORDER BY cluster_id;
+------+---------------+------------+
| pid
| point
| cluster_id |
+------+---------------+------------+
|
3 | {-1080,1190}
|
0 |
|
13 | {-1100,1100}
|
0 |
|
15 | {-1220,1110}
|
0 |
|
16 | {-1300,1400}
|
0 |
|
8 | {-1100,-1100} |
1 |
|
9 | {-1080,-1190} |
1 |
|
10 | {-1220,-1110} |
1 |
|
12 | {-1300,-1400} |
1 |
|
4 | {1100,-1100}
|
2 |
|
5 | {1080,-1190}
|
2 |
|
7 | {1220,-1110}
|
2 |
|
14 | {1301,-1400}
|
2 |
|
1 | {1100,1100}
|
3 |
|
2 | {1220,1110}
|
3 |
|
6 | {1080,1190}
|
3 |
|
11 | {1300,1400}
|
3 |
+------+---------------+------------+