1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
| import pandas as pd import numpy as np
for i in list(data.columns): Max = np.max(data[i]) Min = np.min(data[i]) if (i == '船舶运价' or '电厂库存' or '煤炭库存' or '液化天然气产量' or '原煤产量'): data[i] = (Max - data[i])/(Max - Min) else: data[i] = (data[i] - Min)/(Max - Min)
data_p = data.copy() for i in range(len(data_p.columns)): sum_col = sum(data_p.iloc[:,i]) data_p.iloc[:,i] = data_p.iloc[:,i].apply(lambda x: x/sum_col if x / sum_col != 0 else 1e-6) data_h =[] m = len(data.iloc[:,0]) k = 1 / np.log(m) for i in range(len(data.columns)): temp = 0 for j in range(m): temp += data_p.iloc[j,i]*np.log(data_p.iloc[j,i]) data_h.append(-1 * temp * k)
data_c = pd.DataFrame(1 - np.array(data_h))
data_w = data_c.copy() total = sum(data_c['各特征差异系数']) for i in range(len(data_c["各特征差异系数"])): data_w["各特征差异系数"][i] = data_c["各特征差异系数"][i] / total data_w
|