|
- import numpy as np
-
-
- def Mutual_Info(x, C):
- # for single mutual information, one x vs one C
- catg_x, num_x = np.unique(x, return_counts=True) # catg_x: probable choice
- prob_x = np.array(num_x / float(sum(num_x))) # prob_x: probability
- catg_C, num_C = np.unique(C, return_counts=True)
- prob_C = np.array(num_C / float(sum(num_C)))
-
- M_table = np.zeros((len(catg_x), len(catg_C)))
-
- for i in range(len(x)):
- currnt_x = x[i]
- match_C = C[i]
- x_index = np.where(catg_x == currnt_x)
- C_index = np.where(catg_C == match_C)
- x_index = int(x_index[0])
- C_index = int(C_index[0])
- M_table[x_index][C_index] += 1
-
- M_table /= M_table.sum()
-
- M_info = 0
- for x in catg_x:
- for C in catg_C:
- x_index = int(np.where(catg_x == x)[0])
- C_index = int(np.where(catg_C == C)[0])
- prob_xC = M_table[x_index][C_index]
-
- if prob_xC > 0:
- M_info += prob_xC * np.log10(prob_xC / (prob_x[x_index] * prob_C[C_index]))
- return M_info
-
-
- def Muliti_Mutual_Info(x, y, C):
- # for two mutual information, feature x,y and label class C
- catg_x, num_x = np.unique(x, return_counts=True) # catg_x: probable choice
- prob_x = np.array(num_x / float(sum(num_x))) # prob_x: probability
- catg_y, num_y = np.unique(y, return_counts=True)
- prob_y = np.array(num_y / float(sum(num_y)))
- catg_C, num_C = np.unique(C, return_counts=True)
- prob_C = np.array(num_C / float(sum(num_C)))
-
- M_table = np.zeros((len(catg_x), len(catg_y), len(catg_C)))
- M_table2 = np.zeros((len(catg_x), len(catg_y)))
-
- for i in range(len(x)):
- currnt_x = x[i]
- currnt_y = y[i]
- match_C = C[i]
- x_index = int(np.where(catg_x == currnt_x)[0])
- y_index = int(np.where(catg_y == currnt_y)[0])
- C_index = int(np.where(catg_C == match_C)[0])
- M_table[x_index][y_index][C_index] += 1
-
- for i in range(len(x)):
- currnt_x = x[i]
- currnt_y = y[i]
- x_index = int(np.where(catg_x == currnt_x)[0])
- y_index = int(np.where(catg_y == currnt_y)[0])
- M_table2[x_index][y_index] += 1
-
- M_table /= M_table.sum()
- M_table2 /= M_table2.sum()
-
- M_info = 0
- for x in catg_x:
- for y in catg_y:
- for C in catg_C:
- x_index = int(np.where(catg_x == x)[0])
- y_index = int(np.where(catg_y == y)[0])
- C_index = int(np.where(catg_C == C)[0])
- prob_xyC = M_table[x_index][y_index][C_index]
- prob_xy = M_table2[x_index][y_index]
-
- if prob_xyC > 0:
- M_info += prob_xyC * np.log10(prob_xyC / (prob_C[C_index] * prob_xy))
- return M_info
-
-
- def Sum_ICI(X, C, cur_featind, y):
- sum = 0
- for ith_feat in cur_featind:
- xi = X[:, ith_feat]
- sum += 2 * Muliti_Mutual_Info(xi, y, C) - Mutual_Info(xi, C) - Mutual_Info(y, C)
- return sum
-
-
- def mRMR_sel(X, cur_featind, rel_array, red_array):
- num_feat = X.shape[1]
- num_sel_feat = len(cur_featind)
- mRMR_array = np.ones(num_feat) * -float('inf')
- xj = X[:, cur_featind[-1]] # the last append feature, just consider one feature, instead of all selected features
-
- for ith_feat in range(num_feat):
- if ith_feat not in cur_featind:
- xi = X[:, ith_feat]
- red_array[ith_feat] += Mutual_Info(xi, xj) # record redundancy
- mRMR_array[ith_feat] = rel_array[ith_feat] - red_array[ith_feat]/num_sel_feat
- max_index = np.argsort(mRMR_array)[-1] # candidate feature which has max mRMR value
- cur_featind.append(max_index)
- return cur_featind
-
-
- def MaxRel_sel(X, C, cur_featind, rel_array):
- num_feat = X.shape[1]
- num_sel_feat = len(cur_featind)
- MaxRel_array = np.ones(num_feat) * -float('inf')
-
- for ith_feat in range(num_feat):
- if ith_feat not in cur_featind:
- xi = X[:, ith_feat]
- rel_array[ith_feat] += Mutual_Info(xi, C) # record relevance
- MaxRel_array[ith_feat] = rel_array[ith_feat]/(num_sel_feat + 1)
- max_index = np.argsort(MaxRel_array)[-1]
- cur_featind.append(max_index)
- return cur_featind
-
-
- def MRI_sel(X, C, cur_featind):
- num_feat = X.shape[1]
- MRI_array = np.ones(num_feat) * -float('inf')
-
- for ith_feat in range(num_feat):
- if ith_feat not in cur_featind:
- xk = X[:, ith_feat]
- MRI = Mutual_Info(xk, C) + Sum_ICI(X, C, cur_featind, xk)
- MRI_array[ith_feat] = MRI
- max_index = np.argsort(MRI_array)[-1]
- cur_featind.append(max_index)
- return cur_featind
|