# construct sub_correlation_matrix
    sub_correlation_matrix=np.zeros([len(classes_in_current_split_group),len(classes_in_current_split_group)])
    # print("sub_correlation_matrix before initialize: \n",sub_correlation_matrix)
    for row_idx in range(len(classes_in_current_split_group)):
        for col_idx in range(len(classes_in_current_split_group)):
            sub_correlation_matrix[row_idx,col_idx]=coco_correlation_A_B[classes_in_current_split_group[row_idx],classes_in_current_split_group[col_idx]]
    # print("sub_correlation_matrix after initialize: \n",sub_correlation_matrix)

创建list，list中为需要采样的行数和列数

然后for list中的元素

子矩阵行下标为 row_idx, 对应原矩阵行下标为 classes_in_current_split_group[row_idx]

二、字典、list、array的添加与遍历

2.1 添加元素

split_groups={} 
split_groups[1]=classes_number

直接字典[key]=key_value

中括号内为键，等号后为键值

print该字典为

{1: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}

2.2 遍历字典

直接for 变量 in 字典

则变量即为key，字典[变量] 即字典中元素

for key in split_groups: 
    print("\ngroup:",key,
    " group element numbers",len(split_groups[key]),
    "\ngroup_elements",split_groups[key])

2.3 添加list元素

直接 listname.append(需要加入的元素)

#  number of label classes from 0,1,2,...79
classes_number=[]
for class_idx in range(0,80):
    classes_number.append(class_idx)

2.4 遍历list

    for row_idx in range(len(classes_in_current_split_group)):
        for col_idx in range(len(classes_in_current_split_group)):
            sub_correlation_matrix[row_idx,col_idx]=coco_correlation_A_B[classes_in_current_split_group[row_idx],classes_in_current_split_group[col_idx]]

这里需要注意，len需要 in range(len(list_name))，如果直接 for index in len(list_name)则会报错

同时，找list中元素用中括号[], 中括号中不同维度用逗号隔开。

2.5 np.array

创建数组

https://www.cnblogs.com/hezhefly/p/8278842.html

sub_correlation_matrix=np.zeros([len(classes_in_current_split_group),len(classes_in_current_split_group)])

创建的array名称=np.zeros([维度])

2.6 array中元素

arrya_name[维度0 坐标,维度1坐标]

例如：

    for row_idx in range(len(classes_in_current_split_group)):
        for col_idx in range(len(classes_in_current_split_group)):
            sub_correlation_matrix[row_idx,col_idx]=coco_correlation_A_B[classes_in_current_split_group[row_idx],classes_in_current_split_group[col_idx]]

三、完整程序

#-*-coding:utf-8 -*-
"""
created by xingxinangrui on 2019.5.7
this program is to perform spectral clustering on coco dataset labels
Cluster big groups to two groups until no elemnts size >10

-----------------------1.----------------------------
load coco_correlations.pkl
load coco_names.pkl

in which:

names is a 80 dimension list contains label names
names :  ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck',
......
'hot dog', 'pizza', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

A_B is a correlation matrix
A_B [[1.00000000e+00 8.26410144e-01 7.04392284e-01 ... 4.03311258e-01
  4.45312500e-01 5.40000000e-01]
 ...
 [8.36764511e-03 1.74901618e-03 6.97188008e-04 ... 3.97350993e-03
  1.40625000e-01 1.00000000e+00]]

A_B.shape (80, 80)
notA_B.shape (80, 80)
A_notB.shape (80, 80)
notA_notB.shape (80, 80)

    correlations = {}
    correlations.update(pp=A_B) #p(A/B)
    correlations.update(fp=notA_B) # P(not A/B)
    correlations.update(pf=A_notB)
    correlations.update(ff=notA_notB)

----------------------2.------------------------
cluster from one big group to two small groups until all group_element_number < max_classes_per_group

"""

import numpy as np
from sklearn import datasets
from sklearn.cluster import SpectralClustering
from sklearn import metrics

import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import json
import os

import argparse

import warnings
warnings.filterwarnings("ignore")

# env/bin/python sk_spectral_cluster/coco_SpecCluster_iter2group.py --max_classes_per_group 10 --probability_filter_threshold 0.1 --show_cluster_process 0

parser = argparse.ArgumentParser(description='coco_label_spectral_clustering_iter_two_groups')
parser.add_argument('--max_classes_per_group', '-i', default=10, type=int,
                    metavar='N', help='Max classes in each group after cluster')
parser.add_argument('--probability_filter_threshold', default=0.1, type=float,
                    help='filter probilities which less than probability_filter_threshold set to 0')
parser.add_argument('--show_cluster_process', default=1, type=int,
                     help='if 1 show cluster process, else just show final result')

def coco_label_spectral_clustering_iter_two_groups():

    #parsars

    global args
    args = parser.parse_args()
    max_classes_per_group=args.max_classes_per_group
    show_cluster_process=args.show_cluster_process

    print("-----------------------------------------------------")
    print("-----------------------------------------------------")
    print("-----------------------------------------------------")

    # ----------------------load coco_correlations.pkl and load coco_names.pkl--------
    with open('sk_spectral_cluster/coco_correlations.pkl', 'rb') as f:
        print("loading coco_correlations.pkl ")
        correlations= pickle.load(f)
    with open('sk_spectral_cluster/coco_names.pkl', 'rb') as f:
        print("loading coco_names.pkl")
        names=pickle.load(f)

    coco_correlation_before_filter=correlations['pp']
    #print('coco label corrrelation matrix (80*80) : \n' , coco_correlation_A_B)

    # filter probilities which less than probability_filter_threshold set to 0

    coco_correlation_A_B=coco_correlation_before_filter
    probability_filter_threshold=args.probability_filter_threshold

    for row_idx in range(coco_correlation_before_filter.shape[0]):
        for col_idx in range(coco_correlation_before_filter.shape[1]):
            # print(coco_correlation_before_filter[row_idx,col_idx])
            if coco_correlation_A_B[row_idx,col_idx]<probability_filter_threshold:
                coco_correlation_A_B[row_idx, col_idx]=0
            else:
                coco_correlation_A_B[row_idx, col_idx]=coco_correlation_before_filter[row_idx, col_idx]

    # # print filtered matrix for check
    # for row_idx in range(coco_correlation_A_B.shape[0]):
    #     for col_idx in range(coco_correlation_A_B.shape[1]):
    #         print(coco_correlation_A_B[row_idx,col_idx])


    #---------cluster from one big group to two small groups until group_element_number < 10

    #  number of label classes from [0,1,2,...79]
    classes_number=[]
    for class_idx in range(0,80):
        classes_number.append(class_idx)

    #  splited groups after clustering format:
    #  { 1 : [1.2,4,6,45,... ]
    #    2 : [8,10,....]
    #    3 : [55,66.... ]  }
    split_groups={}
    split_groups[1]=classes_number

    #  -------print all classes before split-----------
    #print("All_classes_number : \n", classes_number)
    #print("groups_before_split : \n", split_groups)

    exist_GroupElementNumber_more_than_ten=1

    iter_times=0
    # loop until no elemnts bigger than 10 in split_groups
    while exist_GroupElementNumber_more_than_ten==1:

        # find if all elements in split_groups small than ten
        # if exist GroupElementNumber more than ten split current group
        # if not exist means all GroupElementNumber less than ten , break while
        exist_GroupElementNumber_more_than_ten=0
        current_split_group_idx=-1
        for key in split_groups:
            # print("group: ",key,"  group element numbers: ",len(split_groups[key]),"  group_elements : ",split_groups[key])
            if len(split_groups[key])>max_classes_per_group:
                iter_times=iter_times+1
                print("Not all element number less than", max_classes_per_group ,"  split start, iteration : ",iter_times,"split group: ",key)
                exist_GroupElementNumber_more_than_ten=1
                current_split_group_idx=key
                break
        if exist_GroupElementNumber_more_than_ten==0:
            print(" All group element number less than ten! program end!")
            print(" Final split groups: ")
            for key in split_groups:
                print("group:", key, "  group element numbers", len(split_groups[key]), "  group_elements :  ", split_groups[key])
            break

        # ----------------split current_split_group---------------

        classes_in_current_split_group = split_groups[key]
        # print("Current split group idx : ",key,"\nClassses in current split group : ",classes_in_current_split_group)

        # construct sub_correlation_matrix
        sub_correlation_matrix=np.zeros([len(classes_in_current_split_group),len(classes_in_current_split_group)])
        # print("sub_correlation_matrix before initialize: \n",sub_correlation_matrix)
        for row_idx in range(len(classes_in_current_split_group)):
            for col_idx in range(len(classes_in_current_split_group)):
                sub_correlation_matrix[row_idx,col_idx]=coco_correlation_A_B[classes_in_current_split_group[row_idx],classes_in_current_split_group[col_idx]]
        # print("sub_correlation_matrix after initialize: \n",sub_correlation_matrix)

        # sepctral clustering to two groups
        # pred_y in format [1,0,0,0,1,1,0,1,...0,1]
        gamma=1
        pred_y = SpectralClustering(n_clusters=2,gamma=gamma).fit_predict(sub_correlation_matrix)

        # wirte splited groups into two list and update split_groups
        splited_class_idx_list_0 = []
        splited_class_idx_list_1 = []
        for pred_idx in range(len(pred_y)):
            # write two list
            if pred_y[pred_idx]==0:
                splited_class_idx_list_0.append(classes_in_current_split_group[pred_idx])
            else:
                splited_class_idx_list_1.append(classes_in_current_split_group[pred_idx])
        # print("splited_class_idx_list_0",splited_class_idx_list_0, "\nsplited_class_idx_list_1",splited_class_idx_list_1)
        # update split_groups
        split_groups[current_split_group_idx]=splited_class_idx_list_0
        split_groups[len(split_groups)+1]=splited_class_idx_list_1

        if show_cluster_process==1 :
            print("Updated split_groups: ")
            for group_idx in split_groups:
                print("group", group_idx,"  group element numbers: ",len(split_groups[group_idx]) , "  group_elements : ", split_groups[group_idx])
            print("split ended !\n")

    #---------------split ended ,write final results-------------------------

    # write group and class names into group_class_names
    class_names_in_groups={}
    for group_idx in split_groups:
        current_group_name_list=[]
        current_group_class_idx_list=split_groups[group_idx]
        for class_idx in range(len(current_group_class_idx_list)):
            current_group_name_list.append(names[current_group_class_idx_list[class_idx]])
        class_names_in_groups[group_idx]=current_group_name_list

    #   print final results
    print("\nFinal results,group numbers: ",len(split_groups), " max_classes_per_group: ",max_classes_per_group," probability filter threshold: ", probability_filter_threshold)

    for group_idx in class_names_in_groups:
        print("group:", group_idx, "  group element numbers: ", len(class_names_in_groups[group_idx]), "\ngroup_elements : ",class_names_in_groups[group_idx])


if __name__ == '__main__':
    coco_label_spectral_clustering_iter_two_groups()

原文链接：https://blog.csdn.net/weixin_36474809/article/details/89915754