import sys import os import datetime import pandas as pd import multiprocessing import itertools sys.path.append("./src") sys.path.append("./evaluation") from model.product_structure_model import ProductStructureModel from model.preferences_model import Preferences from model.configuration_model import ConfigurationModel from managers.recommendation_manager import SimpleConfigurationMaxSelector from scoring.scoring_functions import ReduceScoringFunctionFactory from user_type_mappings import TYPE_ATHLETE, TYPE_CONSUMER, TYPE_ENVIRONMENTALIST, TYPE_OWNER, TYPE_RANDOM import operator import time import numpy as np import matplotlib.pyplot as pp import random import math import json with open('./evaluation/product_structure.json') as json_file: data = json.load(json_file) product_structure = ProductStructureModel(data) from tinydb import TinyDB def DB(): return TinyDB('eval.json') def DB_CONFIG(): return DB().table('CONFIG') def DB_PRODUCT_STRUCTURE(): return DB().table('PRODUCT_STRUCTURE') CONFIGURATIONS_UNFINISHED = [] PREFERENCES_RANDOM_MEMBER = [] PREFERENCES_ALL = [] def generate_group_preferences(user_type_mappings, amount = 1000): global PREFERENCES_RANDOM_MEMBER global PREFERENCES_ALL characteristics = product_structure.get_list_of_characteristics() PREFERENCES_ALL = [] PREFERENCES_RANDOM_MEMBER = [] for i in range(amount): users = [] single_user = [] counter = random.randint(0, len(user_type_mappings) - 1) for mapping in user_type_mappings: ratings = [] for char in characteristics: value = mapping[char.elementId].generateNumber() ratings.append({ "code": char.elementId, "value": value, }) user = { "user": mapping['name'], "ratings": ratings, } users.append(user) if counter == 0: single_user.append(user) counter -= 1 PREFERENCES_ALL.append( Preferences({'preferences' : users}) ) PREFERENCES_RANDOM_MEMBER.append( Preferences({'preferences' : single_user}) ) return PREFERENCES_ALL def generate_unfinished_configurations(fullness=0.3, amount=1000): configurations = TinyDB('./evaluation/eval.json').table('CONFIG').all() global CONFIGURATIONS_UNFINISHED characteristics = list(map(lambda x: x.elementId,ProductStructureModel(data).get_list_of_characteristics())) CONFIGURATIONS_UNFINISHED = [] for i in range(amount): final_config = configurations[random.randint(0, len(configurations) - 1)] codes = list(filter(lambda x: x in characteristics, final_config['configuration'])) conf_size = math.ceil(len(codes) * fullness) unfishied_config = random.sample(codes, conf_size) CONFIGURATIONS_UNFINISHED.append(ConfigurationModel({ "configuration": unfishied_config, "variables": [] })) return CONFIGURATIONS_UNFINISHED def get_ratings(requests, finished_configurations, product_structure, scoring_function=None): if scoring_function == None : scoring_function = ReduceScoringFunctionFactory.build_scoring_function( ["penalty_ratio", "pref_product_simpleSelectedCharacterstics_average"], #["pref_average_flat"], product_structure, oper = operator.mul ) list_ofScoreLists = [] for (preference, config) in requests: list_ofScoreLists.append(list(map(lambda to_rate: scoring_function.calc_score(config, preference, to_rate), finished_configurations))) return list_ofScoreLists def plot_at_y(arr, val): pp.plot(arr, np.zeros_like(arr) + val, 'x') def get_scores_for_one(configurationState, preference, finished_configurations, product_structure, scoring_function=None): if scoring_function == None: scoring_function = ReduceScoringFunctionFactory.build_scoring_function( ["penalty_ratio", "pref_product_simpleSelectedCharacterstics_average"], product_structure, oper = operator.mul ) return list(map(lambda to_rate: scoring_function.calc_score(configurationState, preference, to_rate), finished_configurations)) def get_scoring_functions(): product = ReduceScoringFunctionFactory.build_scoring_function( ["penalty_ratio", "pref_product_simpleSelectedCharacterstics_average"], product_structure, oper = operator.mul) misery = ReduceScoringFunctionFactory.build_scoring_function( ["penalty_ratio", "pref_min_simpleSelectedCharacterstics_average"], product_structure, oper = operator.mul) average = ReduceScoringFunctionFactory.build_scoring_function( ["penalty_ratio", "pref_average_simpleSelectedCharacterstics_average"], product_structure, oper = operator.mul) return [("multiplication",product), ("least misery", misery), ("best average", average)] def main(amount=1000, fullness=0.1, center=50, threshold_distance_from_centre = 0, group_type='heterogeneous', outdir="./out"): global CONFIGURATIONS_UNFINISHED global PREFERENCES_RANDOM_MEMBER global PREFERENCES_ALL print("Started Evaluation") if group_type == 'homogenous': group_type_mappings = [TYPE_OWNER, TYPE_OWNER, TYPE_OWNER, TYPE_OWNER] elif group_type == 'random': group_type_mappings = [TYPE_RANDOM, TYPE_RANDOM, TYPE_RANDOM, TYPE_RANDOM] else: group_type='heterogeneous' group_type_mappings = [TYPE_ATHLETE, TYPE_CONSUMER, TYPE_ENVIRONMENTALIST, TYPE_OWNER] settings = "amount-{}__center-{}__tdistance-{}__fullness-{}__group-{}".format(amount, center, threshold_distance_from_centre, fullness, group_type) outdir += "/{}__{}".format(datetime.datetime.utcnow().strftime("%Y_%m_%d_T%H-%M-%S%z"), settings) # check the directory does not exist if not(os.path.exists(outdir)): # create the directory you want to save to os.mkdir(outdir) if not(os.path.exists("{}/data".format(outdir))): os.mkdir("{}/data".format(outdir)) if not(os.path.exists("{}/fig".format(outdir))): os.mkdir("{}/fig".format(outdir)) random.seed(10924892319) np.random.seed(seed=956109142) start_total = start = time.time() # Generating preferences and unfinished configurations generate_group_preferences(group_type_mappings, amount=amount) generate_unfinished_configurations(fullness=0.1, amount = amount) requests_random_member = list(zip(PREFERENCES_RANDOM_MEMBER, CONFIGURATIONS_UNFINISHED)) requests_all = list(zip(PREFERENCES_ALL, CONFIGURATIONS_UNFINISHED)) end = time.time() print("Done generating data! It took: {} seconds".format(end - start)) start = time.time() finished_configurations = list(map(lambda x: ConfigurationModel(x), TinyDB('./evaluation/eval.json').table('CONFIG').all())) random.shuffle(finished_configurations) end = time.time() print("Done loading finished configurations! It took: {} seconds".format(end - start)) scoring_function_list = get_scoring_functions() results_happiness_db_size_avg_diff = [] results_unhappiness_db_size_avg_diff = [] results_happiness_db_size_avg_total_all = [] results_unhappiness_db_size_avg_total_all = [] piece_counts = [16, 8, 4, 2, 1] scoring_function_labels = list(map(lambda x: x[0], scoring_function_list)) db_sizes_label = list(map(lambda x: len(finished_configurations) // x, piece_counts)) for label, scoring_function in scoring_function_list: print("!!! Starting evaluation of: {} !!!".format(label)) # Rate configurations start = time.time() np_scores_random = np.array(get_ratings(requests_random_member,finished_configurations,product_structure, scoring_function=scoring_function)) np_scores_all = np.array(get_ratings(requests_all,finished_configurations,product_structure, scoring_function=scoring_function)) end = time.time() print("Done rating stored configurations! It took: {} seconds".format(end - start)) happiness_db_size_avg_diff = [] unhappiness_db_size_avg_diff = [] happiness_db_size_avg_total_all = [] unhappiness_db_size_avg_total_all = [] happiness_db_size_stdd = [] unhappiness_db_size_stdd = [] for piece_count in piece_counts: happiness_diff_list = [] unhappiness_diff_list = [] happiness_all_list = [] unhappiness_all_list = [] step_size = len(finished_configurations) // piece_count residual = len(finished_configurations) % piece_count for run_count in range(piece_count): print("Starting run {} of {} with {} as store size.".format(run_count, (piece_count - 1) ,step_size)) offset_start = 0 offset_end = 0 if residual > 0: residual -= 1 offset_end = 1 start_pos = run_count * step_size + offset_start end_pos = (run_count + 1) * step_size + offset_start + offset_end offset_start += offset_end start = time.time() # Filtering data modifier_random = np.zeros(np_scores_random.shape) modifier_all = np.zeros(np_scores_all.shape) modifier_random[:,start_pos:end_pos] += 1 modifier_all[:,start_pos:end_pos] += 1 #np_scores_modified_random = np.multiply(np_scores_random[:], modifier_random) np_scores_modified_random = np_scores_random[:] np_scores_modified_all = np.multiply(np_scores_all[:], modifier_all) index_max_random = np.argmax(np_scores_modified_random, axis=1) index_max_all = np.argmax(np_scores_modified_all, axis=1) end = time.time() print("Done getting recommendations! It took: {} seconds".format(end - start)) # Generate individual scores start = time.time() scores_individual = [[[] for i in range(len(group_type_mappings))] for i in range(amount)] j = 0 for preference, configurationState in requests_all: individuals = preference.getIndividualPreferences() i = 0 for individual in individuals: scores_individual[j][i] = get_scores_for_one(configurationState, individual, finished_configurations, product_structure, scoring_function=scoring_function) i += 1 j += 1 end = time.time() print("Done generating individual scores! It took: {} seconds".format(end - start)) #Generate hapiness level start = time.time() avg_happy_diff = 0 avg_unhappy_diff = 0 avg_happy_all = 0 avg_unhappy_all = 0 individual_index = 0 for individuals_scores in scores_individual: unhappy_rand = 0 unhappy_all = 0 happy_rand = 0 happy_all = 0 for individual_score in individuals_scores: np_individual_score = np.array(individual_score) unhappy_threshold = np.percentile(np_individual_score, center - threshold_distance_from_centre) happy_threshold = np.percentile(np_individual_score, center + threshold_distance_from_centre) score_rand = np_individual_score[index_max_random[individual_index]] score_all = np_individual_score[index_max_all[individual_index]] if score_all > happy_threshold: happy_all += 1 elif score_all < unhappy_threshold: unhappy_all += 1 if score_rand > happy_threshold: happy_rand += 1 elif score_rand < unhappy_threshold: unhappy_rand += 1 avg_happy_diff += happy_all - happy_rand avg_unhappy_diff += unhappy_all - unhappy_rand avg_happy_all += happy_all avg_unhappy_all += unhappy_all individual_index += 1 avg_happy_diff /= amount avg_unhappy_diff /= amount avg_happy_all /= amount avg_unhappy_all /= amount happiness_diff_list.append(avg_happy_diff) unhappiness_diff_list.append(avg_unhappy_diff) happiness_all_list.append(avg_happy_all) unhappiness_all_list.append(avg_unhappy_all) print("-- Average increase in happiness: {} | Average increase in unhappiness: {}".format(avg_happy_diff, avg_unhappy_diff)) print("-- Average happiness: {} | Average unhappiness: {}".format(avg_happy_all, avg_unhappy_all)) end = time.time() print("Done rating recommendations! It took: {} seconds".format(end - start)) happiness_db_size_avg_diff.append(np.average(np.array(happiness_diff_list))) unhappiness_db_size_avg_diff.append(np.average(np.array(unhappiness_diff_list))) happiness_db_size_avg_total_all.append(np.average(np.array(happiness_all_list))) unhappiness_db_size_avg_total_all.append(np.average(np.array(unhappiness_all_list))) results_happiness_db_size_avg_diff.append(happiness_db_size_avg_diff) results_unhappiness_db_size_avg_diff.append(unhappiness_db_size_avg_diff) results_happiness_db_size_avg_total_all.append(happiness_db_size_avg_total_all) results_unhappiness_db_size_avg_total_all.append(unhappiness_db_size_avg_total_all) column_names = db_sizes_label row_names = scoring_function_labels pd.DataFrame(results_happiness_db_size_avg_diff, index=row_names, columns=column_names).to_csv("{}/data/_happy_increase.csv".format(outdir), index=True, header=True, sep=',') pd.DataFrame(results_unhappiness_db_size_avg_diff, index=row_names, columns=column_names).to_csv("{}/data/_unhappy_increase.csv".format(outdir).format(outdir), index=True, header=True, sep=',') pd.DataFrame(results_happiness_db_size_avg_total_all, index=row_names, columns=column_names).to_csv("{}/data/_happy_total_all.csv".format(outdir), index=True, header=True, sep=',') pd.DataFrame(results_unhappiness_db_size_avg_total_all, index=row_names, columns=column_names).to_csv("{}/data/_unhappy_total_all.csv".format(outdir).format(outdir), index=True, header=True, sep=',') end_total = time.time() print("Done! Total time: {} seconds".format(end_total - start_total)) axis=[0,150, -1, 0.5] pp.figure(figsize=(8,4), dpi=300) pp.subplots_adjust(hspace = 0.8, wspace=0.4) pp.subplot(1, 2, 1, title="happiness increase average", ) for result_happy in results_happiness_db_size_avg_diff: pp.plot(db_sizes_label, result_happy) pp.legend(scoring_function_labels) pp.xlabel("number of stored configurations") pp.ylabel("number of people") pp.axis(axis) pp.subplot(1, 2, 2, title="unhappiness increase average") for result_unhappy in results_unhappiness_db_size_avg_diff: pp.plot(db_sizes_label, result_unhappy) pp.legend(scoring_function_labels) pp.xlabel("number of stored configurations") pp.ylabel("number of people") pp.axis(axis) pp.savefig("{}/fig/happy_unhappy_increase.pdf".format(outdir),format="pdf") pp.figure(figsize=(8,4), dpi=300) axis=[0,150, 0, 4] pp.subplots_adjust(hspace = 0.8, wspace=0.4) pp.subplot(1, 2, 1, title="happiness absolute average", ) for result_happy in results_happiness_db_size_avg_total_all: pp.plot(db_sizes_label, result_happy) pp.legend(scoring_function_labels) pp.xlabel("number of stored configurations") pp.ylabel("number of people") pp.axis(axis) pp.subplot(1, 2, 2, title="unhappiness absolute average") for result_unhappy in results_unhappiness_db_size_avg_total_all: pp.plot(db_sizes_label, result_unhappy) pp.legend(scoring_function_labels) pp.xlabel("number of stored configurations") pp.ylabel("number of people") pp.axis(axis) pp.savefig("{}/fig/happy_unhappy_total_all.pdf".format(outdir),format="pdf") def main_tuple(param): print("----------------------------------------------------------------------------------------") print("----------------------Starting: {}----------------------".format(param)) print("----------------------------------------------------------------------------------------") main(amount=param[0], fullness=param[1],center=param[2] ,threshold_distance_from_centre = param[3], group_type= param[4]) return True if __name__ == "__main__": num_cores = multiprocessing.cpu_count() amounts = [1] fullnesses = [0.1] centers = [10, 20, 30, 40, 50, 60, 70, 80, 90] dists = [5] g_types = ["heterogeneous", "random", "homogenous"] params = list(itertools.product(amounts, fullnesses, centers, dists, g_types)) pool = multiprocessing.Pool(processes=num_cores) res = pool.map(main_tuple, params)