UU_NCML_Project/voting_lib/voting_analysis.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
from neupy import algorithms
from itertools import product
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
import re
from matplotlib.colors import ListedColormap

def train_model(X, grid_h, grid_w, radius, step, ep):

    inp = X.shape[1]   # No of features (bills)

    # Create SOFM
    sofmnet = algorithms.SOFM(
        n_inputs=inp,
        step=step,
        show_epoch=100,
        shuffle_data=True,
        verbose=True,
        learning_radius=radius,
        features_grid=(grid_h, grid_w)
    )

    sofmnet.train(X, epochs=ep)
    return sofmnet

def predict(model, data, grid_h, grid_w, party_colors, comparison_data=pd.DataFrame()):
    # default tight layout
    plt.rcParams["figure.autolayout"] = True

    X = data[:,2:]

    # predicting mp positions
    prediction = model.predict(X)
    print(f'prediction: {prediction}')

    # Plot hit map
    plot_hits(prediction, grid_w, grid_h)
    plt.title("Hitmap")

    # converting to x and y coordinates
    ys, xs = np.unravel_index(np.argmax(prediction, axis=1), (grid_h, grid_w))

    # plotting mps
    party_affiliation = data[:,1]
    plot_mps(data[:,0], xs, ys, party_affiliation, party_colors, randomize_positions=True)
    plt.title("Members of Parliament")
    plt.show()

    # calculating party positions based on mps
    party_pos = calc_party_pos(np.column_stack((xs, ys)), party_affiliation)

    print(party_pos)

    # Plot node distnaces
    plt.figure()
    weight = model.weight.reshape((model.n_inputs, grid_h, grid_w))
    heatmap = compute_heatmap(weight, grid_h, grid_w)
    plt.imshow(heatmap, cmap ='Blues', interpolation='nearest',zorder=1, alpha=0.5)
    plt.axis('off')
    plt.colorbar()

    # plotting parties
    plot_parties(party_pos, party_colors, randomize_positions=False, new_plot=False)
    plt.title('Node distance plot with parties')

    # plotting party distances in output space
    part_distance_out = calc_party_distances(party_pos)
    plot_party_distances(part_distance_out)
    plt.title('Party Distances')
    plt.show()

    if not comparison_data.empty:
       plot_parties(comparison_data, party_colors, randomize_positions=False, new_plot=True)
       plt.title("Political Compass")
       plt.ylabel("libertarian - authoritarian")
       plt.xlabel("left < economic > right")

       comparison_data_dist = calc_party_distances(comparison_data)
       plot_party_distances(comparison_data_dist)
       plt.title("Political Compass Party Distances")

       err = remove_NaN_rows_columns(normalize_df(part_distance_out) - normalize_df(comparison_data_dist))
       err = err * err
       plot_party_distances(err)
       plt.title(f'Normalized Distance Squared Error, with MSE={np.nanmean(err.to_numpy()):.2f}')
       plt.show()

def iter_neighbours(weights, hexagon=False):
    _, grid_height, grid_width = weights.shape

    hexagon_even_actions = ((-1, 0), (0, -1), (1, 0), (0, 1), (1, 1), (-1, 1))
    hexagon_odd_actions = ((-1, 0), (0, -1), (1, 0), (0, 1), (-1, -1), (1, -1))
    rectangle_actions = ((-1, 0), (0, -1), (1, 0), (0, 1))

    for neuron_x, neuron_y in product(range(grid_height), range(grid_width)):
        neighbours = []

        if hexagon and neuron_x % 2 == 1:
            actions = hexagon_even_actions
        elif hexagon:
            actions = hexagon_odd_actions
        else:
            actions = rectangle_actions

        for shift_x, shift_y in actions:
            neigbour_x = neuron_x + shift_x
            neigbour_y = neuron_y + shift_y

            if 0 <= neigbour_x < grid_height and 0 <= neigbour_y < grid_width:
                neighbours.append((neigbour_x, neigbour_y))

        yield (neuron_x, neuron_y), neighbours

def compute_heatmap(weight, grid_height, grid_width):
    heatmap = np.zeros((grid_height, grid_width))
    for (neuron_x, neuron_y), neighbours in iter_neighbours(weight):
        total_distance = 0

        for (neigbour_x, neigbour_y) in neighbours:
            neuron_vec = weight[:, neuron_x, neuron_y]
            neigbour_vec = weight[:, neigbour_x, neigbour_y]

            distance = np.linalg.norm(neuron_vec - neigbour_vec)
            total_distance += distance

        avg_distance = total_distance / len(neighbours)
        heatmap[neuron_x, neuron_y] = avg_distance

    return heatmap

def plot_hoverscatter(x, y, categories, hover_labels, colors, cmap = plt.cm.RdYlGn):
    fig, ax = plt.subplots()
    ANNOTATION_DISTANCE = 5
    TRANSPARENCY = 0.8
    scatterplot = plt.scatter(x,y,c=colors, s=5, cmap=cmap)

    handles, labels = scatterplot.legend_elements(prop="colors", alpha=0.6)
    print(labels[0])
    cat = list(map(lambda l: categories[int(re.sub(r'([^\d]+)', "", l))], labels))

    legend = ax.legend(handles, cat, bbox_to_anchor=(1.3, 1), loc='upper left')

    annot = ax.annotate("", xy=(0,0),
                        xytext=(ANNOTATION_DISTANCE, ANNOTATION_DISTANCE),
                        textcoords="offset points",
                        bbox=dict(boxstyle="Square"))
    annot.set_visible(False)

    def update_annot(ind):
        index = ind["ind"][0]
        pos = scatterplot.get_offsets()[index]
        annot.xy = pos
        text = f'{hover_labels[index]}'
        annot.set_text(text)
        annot.get_bbox_patch().set_alpha(TRANSPARENCY)

    def hover(event):
        vis = annot.get_visible()
        if event.inaxes == ax:
            cont, ind = scatterplot.contains(event)
            if cont:
                update_annot(ind)
                annot.set_visible(True)
                fig.canvas.draw_idle()
            else:
                if vis:
                    annot.set_visible(False)
                    fig.canvas.draw_idle()

    fig.canvas.mpl_connect("motion_notify_event", hover)

def plot_mps(names, xs, ys, party_affiliation, party_colors, randomize_positions=True):
    # converting parties to numeric format
    party_index_mapping, party_ids = np.unique(party_affiliation, return_inverse=True)

    # add random offset to show points that are in the same location
    if randomize_positions:
        xs_disp = xs + np.random.rand(xs.shape[0]) - 0.5
        ys_disp = ys + np.random.rand(ys.shape[0]) - 0.5
    else:
        xs_disp = xs
        ys_disp = ys

    parties = party_index_mapping[party_ids]

    colormap = ListedColormap(list(map(lambda x: party_colors[x], party_index_mapping)))
    plot_hoverscatter(xs_disp, ys_disp, party_index_mapping, names + " (" + parties + ")", party_ids, cmap=colormap)

def calc_party_pos(members_of_parliament, party_affiliation):
    party_index_mapping, party_ids = np.unique(party_affiliation, return_inverse=True)

    party_pos = np.zeros((party_index_mapping.shape[0], members_of_parliament.shape[1]))
    party_count = np.zeros((party_index_mapping.shape[0], members_of_parliament.shape[1]))

    for i, mp in enumerate(members_of_parliament):
        party_index = party_ids[i]
        party_pos[party_index] = party_pos[party_index] + mp
        party_count[party_index] += 1

    party_pos /= party_count

    return pd.DataFrame(data=party_pos, index=party_index_mapping)

def plot_parties(parties, party_colors, randomize_positions=False, new_plot=True):

    party_index_mapping = parties.index

    colors = list(map(lambda x: party_colors[x], party_index_mapping))


    if new_plot:
        plt.figure()

    if randomize_positions:
        xs_disp = parties[0].to_numpy() + np.random.rand(parties.shape[0]) - 0.5
        ys_disp = parties[0].to_numpy() + np.random.rand(parties.shape[0]) - 0.5
    else:
        xs_disp = parties[0].to_numpy()
        ys_disp = parties[1].to_numpy()

    for i, party in enumerate(party_index_mapping):
        print("Party ", party, " x = ", xs_disp[i], "y = ", ys_disp[i])
        plt.scatter(xs_disp[i], ys_disp[i], label=party, zorder=2, c=colors[i], edgecolors='black')

    plt.legend(title='Parties', bbox_to_anchor=(1.3, 1), loc='upper left')

def calc_party_distances(parties):
    distances = np.zeros((parties.shape[0], parties.shape[0]))
    for i, (_, left_party) in enumerate(parties.iterrows()):
        for j, (_, top_party) in enumerate(parties.iterrows()):
            distances[i,j] = np.linalg.norm(left_party.to_numpy() - top_party.to_numpy())

    party_index_mapping = parties.index
    return pd.DataFrame(data=distances, index=party_index_mapping, columns=party_index_mapping)

def plot_party_distances(distances):
    plt.figure()
    ax = plt.gca()
    ax.tick_params(axis="x", bottom=False, top=True, labelbottom=False, labeltop=True)
    sn.heatmap(distances, cmap='Oranges', annot=True)

def plot_hits(prediction, grid_w, grid_h):
    hits = (prediction.sum(axis=0)).reshape(grid_w, grid_h)
    plt.figure()
    sn.heatmap(hits, annot=True, xticklabels=False, yticklabels=False, cbar=False)

def normalize_df(dataframe):
    df = dataframe.copy(deep=True)
    df = df - np.min(df.to_numpy())
    df = df / np.max(df.to_numpy())
    return df

def remove_NaN_rows_columns(dataframe):
    df = dataframe.copy(deep=True)
    df = df.dropna(axis=0, how='all', thresh=None, subset=None, inplace=False)
    df = df.dropna(axis=1, how='all', thresh=None, subset=None, inplace=False)
    return df