From ff0a416e2663df66a146056fda74f78b78fd3ca1 Mon Sep 17 00:00:00 2001
From: Hannes Kuchelmeister <hannes@kuchelmeister.org>
Date: Tue, 18 May 2021 15:28:16 +0200
Subject: [PATCH] add political compass for germany including plotting

---
 german_analysis.py              |  8 +++++++-
 voting_lib/political_compass.py | 36 +++++++++++++++++++++++++++++++++
 voting_lib/voting_analysis.py   | 30 ++++++++++++++++++++++++---
 3 files changed, 70 insertions(+), 4 deletions(-)
 create mode 100644 voting_lib/political_compass.py

diff --git a/german_analysis.py b/german_analysis.py
index a363452..0858b8e 100755
--- a/german_analysis.py
+++ b/german_analysis.py
@@ -3,6 +3,7 @@
 
 import voting_lib.load_data as ld
 import voting_lib.voting_analysis as va
+import voting_lib.political_compass as pc
 import numpy as np
 
 # Training Paramters
@@ -16,6 +17,9 @@ ep = 300          # No of epochs
 # Load data
 dataset = ld.load_german_data()
 
+years = [2017, 2013, 2009]
+
+i = 0
 for period, df in dataset.items():
 
     print("Election Period ", period)
@@ -27,4 +31,6 @@ for period, df in dataset.items():
     model = va.train_model(X, grid_h, grid_w, radius, step, ep)
 
     # Predict and visualize output
-    va.predict(model, data, grid_h, grid_w)
\ No newline at end of file
+    va.predict(model, data, grid_h, grid_w, pc.get_compass_parties(year=years[i], country='de'))
+    i += 1
+
diff --git a/voting_lib/political_compass.py b/voting_lib/political_compass.py
new file mode 100644
index 0000000..9271f27
--- /dev/null
+++ b/voting_lib/political_compass.py
@@ -0,0 +1,36 @@
+import pandas as pd
+
+def get_compass_parties(year=2017, country='de'):
+    if country == 'de':
+        if year == 2017:
+            data  = [[5.5, 8],  [2, 4.5], [6, 6.25], [-2.5, -1.5], [7, 3], [3, 2.5]]
+            index =  [  'AfD', 'BÜ90/GR', 'CDU/CSU', 'DIE LINKE.',  'FDP',   'SPD']
+        elif year == 2013:
+            data  = [[-3.5, -4],  [7, 6.5],   [-7, -6.5], [1, 2]]
+            index =  ['BÜ90/GR', 'CDU/CSU', 'DIE LINKE.', 'SPD']
+        elif year == 2009:
+            # TODO: add data for 2011
+            data  = [[-1.5, -1.5],  [9.5, 8],     [-6, -2], [3, 3.5]]
+            index =  [  'BÜ90/GR', 'CDU/CSU', 'DIE LINKE.',   'SPD']
+        else:
+            raise Exception("Year " + str(year) + " does not exist for " + country)
+    elif country == 'uk':
+        if year == 2017:
+            # TODO: add data
+            data  = []
+            index =  []
+        elif year == 2015:
+            # TODO: add data
+            data  = []
+            index =  []
+        elif year == 2011:
+            # TODO: add data
+            data  = []
+            index =  []
+            pass
+        else:
+            raise Exception("Year " + str(year) + " does not exist for " + country)
+    else:
+        raise Exception("No data for " + country)
+    
+    return pd.DataFrame(data=data, index=index)
\ No newline at end of file
diff --git a/voting_lib/voting_analysis.py b/voting_lib/voting_analysis.py
index d00694e..41b81a6 100644
--- a/voting_lib/voting_analysis.py
+++ b/voting_lib/voting_analysis.py
@@ -26,7 +26,7 @@ def train_model(X, grid_h, grid_w, radius, step, ep):
     sofmnet.train(X, epochs=ep)
     return sofmnet
 
-def predict(model, data, grid_h, grid_w):
+def predict(model, data, grid_h, grid_w, comparison_data=pd.DataFrame()):
 
     X = data[:,2:]
     
@@ -45,6 +45,8 @@ def predict(model, data, grid_h, grid_w):
     # calculating party positions based on mps
     party_pos = calc_party_pos(np.column_stack((xs, ys)), party_affiliation)
 
+    print(party_pos)
+
     # Plot node distnaces
     plt.figure()
     weight = model.weight.reshape((model.n_inputs, grid_h, grid_w))
@@ -66,6 +68,22 @@ def predict(model, data, grid_h, grid_w):
     plot_party_distances(part_distance_out)
     plt.show()
 
+    if not comparison_data.empty:
+       plot_parties(comparison_data, randomize_positions=False, new_plot=True)
+       plt.title("political compass")
+       plt.ylabel("libertarian - authoritarian")
+       plt.xlabel("left < economic > right")
+       plt.show()
+       comparison_data_dist = calc_party_distances(comparison_data)
+       plot_party_distances(comparison_data_dist)
+       plt.show()
+       err = normalize_df(part_distance_out) - normalize_df(comparison_data_dist)
+       err = err * err
+       plot_party_distances(err)
+       plt.title(f'distance squared error, with mse={str(np.nanmean(err.to_numpy())):.2}')
+       plt.show()
+
+
     #  # plotting party distances in input space (TODO discard)
     # party_pos_out = calc_party_pos(X, party_affiliation)
     # part_distance_in = calc_party_distances(party_pos_out)
@@ -204,8 +222,7 @@ def plot_parties(parties, randomize_positions=False, new_plot=True):
         print("Party ", party, " x = ", xs_disp[i], "y = ", ys_disp[i])
         plt.scatter(xs_disp[i], ys_disp[i], label=party, zorder=2)
         
-    plt.legend(title='Parties',bbox_to_anchor=(1.3, 1), loc='upper left')
-    
+    plt.legend(title='Parties')    
 def calc_party_distances(parties):
     distances = np.zeros((parties.shape[0], parties.shape[0]))
     for i, (_, left_party) in enumerate(parties.iterrows()):
@@ -220,3 +237,10 @@ def plot_party_distances(distances):
     ax = plt.gca()
     ax.tick_params(axis="x", bottom=False, top=True, labelbottom=False, labeltop=True)
     sn.heatmap(distances, cmap='Oranges', annot=True)
+
+
+def normalize_df(dataframe):
+    df = dataframe.copy(deep=True)
+    df = df - np.min(df.to_numpy())
+    df = df / np.max(df.to_numpy()) 
+    return df