mirror of
https://github.com/13hannes11/bachelor_thesis_m.recommend.git
synced 2024-09-04 01:11:00 +02:00
427 lines
18 KiB
Python
427 lines
18 KiB
Python
import sys
|
|
import os
|
|
import datetime
|
|
import pandas as pd
|
|
import multiprocessing
|
|
import itertools
|
|
|
|
sys.path.append("./src")
|
|
sys.path.append("./evaluation")
|
|
from model.product_structure_model import ProductStructureModel
|
|
from model.preferences_model import Preferences
|
|
from model.configuration_model import ConfigurationModel
|
|
from managers.recommendation_manager import SimpleConfigurationMaxSelector
|
|
from scoring.scoring_functions import ReduceScoringFunctionFactory
|
|
from user_type_mappings import TYPE_ATHLETE, TYPE_CONSUMER, TYPE_ENVIRONMENTALIST, TYPE_OWNER, TYPE_RANDOM
|
|
import operator
|
|
import time
|
|
import numpy as np
|
|
import matplotlib.pyplot as pp
|
|
import random
|
|
import math
|
|
import json
|
|
with open('./evaluation/product_structure.json') as json_file:
|
|
data = json.load(json_file)
|
|
product_structure = ProductStructureModel(data)
|
|
|
|
from tinydb import TinyDB
|
|
|
|
|
|
def DB():
|
|
return TinyDB('eval.json')
|
|
|
|
def DB_CONFIG():
|
|
return DB().table('CONFIG')
|
|
|
|
def DB_PRODUCT_STRUCTURE():
|
|
return DB().table('PRODUCT_STRUCTURE')
|
|
|
|
CONFIGURATIONS_UNFINISHED = []
|
|
PREFERENCES_RANDOM_MEMBER = []
|
|
PREFERENCES_ALL = []
|
|
|
|
def generate_group_preferences(user_type_mappings, amount = 1000):
|
|
global PREFERENCES_RANDOM_MEMBER
|
|
global PREFERENCES_ALL
|
|
|
|
characteristics = product_structure.get_list_of_characteristics()
|
|
|
|
PREFERENCES_ALL = []
|
|
PREFERENCES_RANDOM_MEMBER = []
|
|
for i in range(amount):
|
|
users = []
|
|
single_user = []
|
|
counter = random.randint(0, len(user_type_mappings) - 1)
|
|
for mapping in user_type_mappings:
|
|
ratings = []
|
|
for char in characteristics:
|
|
value = mapping[char.elementId].generateNumber()
|
|
ratings.append({
|
|
"code": char.elementId,
|
|
"value": value,
|
|
})
|
|
user = {
|
|
"user": mapping['name'],
|
|
"ratings": ratings,
|
|
}
|
|
users.append(user)
|
|
if counter == 0:
|
|
single_user.append(user)
|
|
counter -= 1
|
|
|
|
PREFERENCES_ALL.append( Preferences({'preferences' : users}) )
|
|
PREFERENCES_RANDOM_MEMBER.append( Preferences({'preferences' : single_user}) )
|
|
return PREFERENCES_ALL
|
|
|
|
def generate_unfinished_configurations(fullness=0.3, amount=1000):
|
|
configurations = TinyDB('./evaluation/eval.json').table('CONFIG').all()
|
|
global CONFIGURATIONS_UNFINISHED
|
|
|
|
characteristics = list(map(lambda x: x.elementId,ProductStructureModel(data).get_list_of_characteristics()))
|
|
|
|
CONFIGURATIONS_UNFINISHED = []
|
|
for i in range(amount):
|
|
final_config = configurations[random.randint(0, len(configurations) - 1)]
|
|
codes = list(filter(lambda x: x in characteristics, final_config['configuration']))
|
|
conf_size = math.ceil(len(codes) * fullness)
|
|
|
|
unfishied_config = random.sample(codes, conf_size)
|
|
|
|
CONFIGURATIONS_UNFINISHED.append(ConfigurationModel({
|
|
"configuration": unfishied_config,
|
|
"variables": []
|
|
}))
|
|
return CONFIGURATIONS_UNFINISHED
|
|
|
|
def get_ratings(requests, finished_configurations, product_structure, scoring_function=None):
|
|
if scoring_function == None :
|
|
scoring_function = ReduceScoringFunctionFactory.build_scoring_function(
|
|
["penalty_ratio", "pref_product_simpleSelectedCharacterstics_average"],
|
|
#["pref_average_flat"],
|
|
product_structure,
|
|
oper = operator.mul
|
|
)
|
|
|
|
list_ofScoreLists = []
|
|
for (preference, config) in requests:
|
|
list_ofScoreLists.append(list(map(lambda to_rate: scoring_function.calc_score(config, preference, to_rate), finished_configurations)))
|
|
return list_ofScoreLists
|
|
|
|
def plot_at_y(arr, val):
|
|
pp.plot(arr, np.zeros_like(arr) + val, 'x')
|
|
|
|
def get_scores_for_one(configurationState, preference, finished_configurations, product_structure, scoring_function=None):
|
|
if scoring_function == None:
|
|
scoring_function = ReduceScoringFunctionFactory.build_scoring_function(
|
|
["penalty_ratio", "pref_product_simpleSelectedCharacterstics_average"],
|
|
product_structure,
|
|
oper = operator.mul
|
|
)
|
|
return list(map(lambda to_rate: scoring_function.calc_score(configurationState, preference, to_rate), finished_configurations))
|
|
|
|
def get_scoring_functions():
|
|
product = ReduceScoringFunctionFactory.build_scoring_function(
|
|
["penalty_ratio", "pref_product_simpleSelectedCharacterstics_average"],
|
|
product_structure,
|
|
oper = operator.mul)
|
|
|
|
misery = ReduceScoringFunctionFactory.build_scoring_function(
|
|
["penalty_ratio", "pref_min_simpleSelectedCharacterstics_average"],
|
|
product_structure,
|
|
oper = operator.mul)
|
|
|
|
average = ReduceScoringFunctionFactory.build_scoring_function(
|
|
["penalty_ratio", "pref_average_simpleSelectedCharacterstics_average"],
|
|
product_structure,
|
|
oper = operator.mul)
|
|
|
|
return [("multiplication",product), ("least misery", misery), ("best average", average)]
|
|
|
|
def main(amount=1000, fullness=0.1, center=50, threshold_distance_from_centre = 0, group_type='heterogeneous', outdir="./out"):
|
|
global CONFIGURATIONS_UNFINISHED
|
|
global PREFERENCES_RANDOM_MEMBER
|
|
global PREFERENCES_ALL
|
|
print("Started Evaluation")
|
|
|
|
if group_type == 'homogenous':
|
|
group_type_mappings = [TYPE_OWNER, TYPE_OWNER, TYPE_OWNER, TYPE_OWNER]
|
|
elif group_type == 'random':
|
|
group_type_mappings = [TYPE_RANDOM, TYPE_RANDOM, TYPE_RANDOM, TYPE_RANDOM]
|
|
else:
|
|
group_type='heterogeneous'
|
|
group_type_mappings = [TYPE_ATHLETE, TYPE_CONSUMER, TYPE_ENVIRONMENTALIST, TYPE_OWNER]
|
|
|
|
settings = "amount-{}__center-{}__tdistance-{}__fullness-{}__group-{}".format(amount, center, threshold_distance_from_centre, fullness, group_type)
|
|
outdir += "/{}__{}".format(datetime.datetime.utcnow().strftime("%Y_%m_%d_T%H-%M-%S%z"), settings)
|
|
|
|
# check the directory does not exist
|
|
if not(os.path.exists(outdir)):
|
|
# create the directory you want to save to
|
|
os.mkdir(outdir)
|
|
if not(os.path.exists("{}/data".format(outdir))):
|
|
os.mkdir("{}/data".format(outdir))
|
|
if not(os.path.exists("{}/fig".format(outdir))):
|
|
os.mkdir("{}/fig".format(outdir))
|
|
|
|
random.seed(10924892319)
|
|
np.random.seed(seed=956109142)
|
|
|
|
start_total = start = time.time()
|
|
|
|
# Generating preferences and unfinished configurations
|
|
generate_group_preferences(group_type_mappings, amount=amount)
|
|
generate_unfinished_configurations(fullness=0.1, amount = amount)
|
|
|
|
requests_random_member = list(zip(PREFERENCES_RANDOM_MEMBER, CONFIGURATIONS_UNFINISHED))
|
|
requests_all = list(zip(PREFERENCES_ALL, CONFIGURATIONS_UNFINISHED))
|
|
end = time.time()
|
|
print("Done generating data! It took: {} seconds".format(end - start))
|
|
|
|
start = time.time()
|
|
finished_configurations = list(map(lambda x: ConfigurationModel(x), TinyDB('./evaluation/eval.json').table('CONFIG').all()))
|
|
random.shuffle(finished_configurations)
|
|
|
|
end = time.time()
|
|
print("Done loading finished configurations! It took: {} seconds".format(end - start))
|
|
|
|
scoring_function_list = get_scoring_functions()
|
|
|
|
results_happiness_db_size_avg_diff = []
|
|
results_unhappiness_db_size_avg_diff = []
|
|
|
|
results_happiness_db_size_avg_total_all = []
|
|
results_unhappiness_db_size_avg_total_all = []
|
|
|
|
piece_counts = [16, 8, 4, 2, 1]
|
|
scoring_function_labels = list(map(lambda x: x[0], scoring_function_list))
|
|
db_sizes_label = list(map(lambda x: len(finished_configurations) // x, piece_counts))
|
|
|
|
for label, scoring_function in scoring_function_list:
|
|
print("!!! Starting evaluation of: {} !!!".format(label))
|
|
|
|
# Rate configurations
|
|
start = time.time()
|
|
np_scores_random = np.array(get_ratings(requests_random_member,finished_configurations,product_structure, scoring_function=scoring_function))
|
|
np_scores_all = np.array(get_ratings(requests_all,finished_configurations,product_structure, scoring_function=scoring_function))
|
|
end = time.time()
|
|
print("Done rating stored configurations! It took: {} seconds".format(end - start))
|
|
|
|
happiness_db_size_avg_diff = []
|
|
unhappiness_db_size_avg_diff = []
|
|
happiness_db_size_avg_total_all = []
|
|
unhappiness_db_size_avg_total_all = []
|
|
|
|
happiness_db_size_stdd = []
|
|
unhappiness_db_size_stdd = []
|
|
|
|
for piece_count in piece_counts:
|
|
|
|
happiness_diff_list = []
|
|
unhappiness_diff_list = []
|
|
happiness_all_list = []
|
|
unhappiness_all_list = []
|
|
|
|
step_size = len(finished_configurations) // piece_count
|
|
residual = len(finished_configurations) % piece_count
|
|
|
|
for run_count in range(piece_count):
|
|
print("Starting run {} of {} with {} as store size.".format(run_count, (piece_count - 1) ,step_size))
|
|
offset_start = 0
|
|
offset_end = 0
|
|
if residual > 0:
|
|
residual -= 1
|
|
offset_end = 1
|
|
|
|
start_pos = run_count * step_size + offset_start
|
|
end_pos = (run_count + 1) * step_size + offset_start + offset_end
|
|
|
|
offset_start += offset_end
|
|
|
|
|
|
start = time.time()
|
|
|
|
# Filtering data
|
|
|
|
modifier_random = np.zeros(np_scores_random.shape)
|
|
modifier_all = np.zeros(np_scores_all.shape)
|
|
|
|
modifier_random[:,start_pos:end_pos] += 1
|
|
modifier_all[:,start_pos:end_pos] += 1
|
|
|
|
#np_scores_modified_random = np.multiply(np_scores_random[:], modifier_random)
|
|
np_scores_modified_random = np_scores_random[:]
|
|
np_scores_modified_all = np.multiply(np_scores_all[:], modifier_all)
|
|
|
|
index_max_random = np.argmax(np_scores_modified_random, axis=1)
|
|
index_max_all = np.argmax(np_scores_modified_all, axis=1)
|
|
|
|
|
|
end = time.time()
|
|
print("Done getting recommendations! It took: {} seconds".format(end - start))
|
|
|
|
# Generate individual scores
|
|
start = time.time()
|
|
scores_individual = [[[] for i in range(len(group_type_mappings))] for i in range(amount)]
|
|
j = 0
|
|
for preference, configurationState in requests_all:
|
|
individuals = preference.getIndividualPreferences()
|
|
i = 0
|
|
for individual in individuals:
|
|
scores_individual[j][i] = get_scores_for_one(configurationState, individual, finished_configurations, product_structure, scoring_function=scoring_function)
|
|
i += 1
|
|
j += 1
|
|
end = time.time()
|
|
print("Done generating individual scores! It took: {} seconds".format(end - start))
|
|
|
|
|
|
#Generate hapiness level
|
|
start = time.time()
|
|
avg_happy_diff = 0
|
|
avg_unhappy_diff = 0
|
|
avg_happy_all = 0
|
|
avg_unhappy_all = 0
|
|
|
|
individual_index = 0
|
|
for individuals_scores in scores_individual:
|
|
unhappy_rand = 0
|
|
unhappy_all = 0
|
|
happy_rand = 0
|
|
happy_all = 0
|
|
|
|
for individual_score in individuals_scores:
|
|
np_individual_score = np.array(individual_score)
|
|
unhappy_threshold = np.percentile(np_individual_score, center - threshold_distance_from_centre)
|
|
happy_threshold = np.percentile(np_individual_score, center + threshold_distance_from_centre)
|
|
|
|
score_rand = np_individual_score[index_max_random[individual_index]]
|
|
score_all = np_individual_score[index_max_all[individual_index]]
|
|
|
|
if score_all > happy_threshold:
|
|
happy_all += 1
|
|
elif score_all < unhappy_threshold:
|
|
unhappy_all += 1
|
|
if score_rand > happy_threshold:
|
|
happy_rand += 1
|
|
elif score_rand < unhappy_threshold:
|
|
unhappy_rand += 1
|
|
avg_happy_diff += happy_all - happy_rand
|
|
avg_unhappy_diff += unhappy_all - unhappy_rand
|
|
avg_happy_all += happy_all
|
|
avg_unhappy_all += unhappy_all
|
|
|
|
individual_index += 1
|
|
|
|
avg_happy_diff /= amount
|
|
avg_unhappy_diff /= amount
|
|
avg_happy_all /= amount
|
|
avg_unhappy_all /= amount
|
|
|
|
happiness_diff_list.append(avg_happy_diff)
|
|
unhappiness_diff_list.append(avg_unhappy_diff)
|
|
|
|
happiness_all_list.append(avg_happy_all)
|
|
unhappiness_all_list.append(avg_unhappy_all)
|
|
|
|
print("-- Average increase in happiness: {} | Average increase in unhappiness: {}".format(avg_happy_diff, avg_unhappy_diff))
|
|
print("-- Average happiness: {} | Average unhappiness: {}".format(avg_happy_all, avg_unhappy_all))
|
|
end = time.time()
|
|
print("Done rating recommendations! It took: {} seconds".format(end - start))
|
|
|
|
happiness_db_size_avg_diff.append(np.average(np.array(happiness_diff_list)))
|
|
unhappiness_db_size_avg_diff.append(np.average(np.array(unhappiness_diff_list)))
|
|
|
|
happiness_db_size_avg_total_all.append(np.average(np.array(happiness_all_list)))
|
|
unhappiness_db_size_avg_total_all.append(np.average(np.array(unhappiness_all_list)))
|
|
|
|
results_happiness_db_size_avg_diff.append(happiness_db_size_avg_diff)
|
|
results_unhappiness_db_size_avg_diff.append(unhappiness_db_size_avg_diff)
|
|
|
|
results_happiness_db_size_avg_total_all.append(happiness_db_size_avg_total_all)
|
|
results_unhappiness_db_size_avg_total_all.append(unhappiness_db_size_avg_total_all)
|
|
|
|
column_names = db_sizes_label
|
|
row_names = scoring_function_labels
|
|
pd.DataFrame(results_happiness_db_size_avg_diff, index=row_names, columns=column_names).to_csv("{}/data/_happy_increase.csv".format(outdir), index=True, header=True, sep=',')
|
|
pd.DataFrame(results_unhappiness_db_size_avg_diff, index=row_names, columns=column_names).to_csv("{}/data/_unhappy_increase.csv".format(outdir).format(outdir), index=True, header=True, sep=',')
|
|
pd.DataFrame(results_happiness_db_size_avg_total_all, index=row_names, columns=column_names).to_csv("{}/data/_happy_total_all.csv".format(outdir), index=True, header=True, sep=',')
|
|
pd.DataFrame(results_unhappiness_db_size_avg_total_all, index=row_names, columns=column_names).to_csv("{}/data/_unhappy_total_all.csv".format(outdir).format(outdir), index=True, header=True, sep=',')
|
|
|
|
|
|
end_total = time.time()
|
|
print("Done! Total time: {} seconds".format(end_total - start_total))
|
|
|
|
axis=[0,150, -1, 0.5]
|
|
pp.figure(figsize=(8,4), dpi=300)
|
|
pp.subplots_adjust(hspace = 0.8, wspace=0.4)
|
|
pp.subplot(1, 2, 1, title="happiness increase average", )
|
|
|
|
for result_happy in results_happiness_db_size_avg_diff:
|
|
pp.plot(db_sizes_label, result_happy)
|
|
|
|
pp.legend(scoring_function_labels)
|
|
pp.xlabel("number of stored configurations")
|
|
pp.ylabel("number of people")
|
|
pp.axis(axis)
|
|
|
|
pp.subplot(1, 2, 2, title="unhappiness increase average")
|
|
|
|
|
|
for result_unhappy in results_unhappiness_db_size_avg_diff:
|
|
pp.plot(db_sizes_label, result_unhappy)
|
|
|
|
pp.legend(scoring_function_labels)
|
|
pp.xlabel("number of stored configurations")
|
|
pp.ylabel("number of people")
|
|
pp.axis(axis)
|
|
|
|
pp.savefig("{}/fig/happy_unhappy_increase.pdf".format(outdir),format="pdf")
|
|
pp.figure(figsize=(8,4), dpi=300)
|
|
|
|
|
|
axis=[0,150, 0, 4]
|
|
pp.subplots_adjust(hspace = 0.8, wspace=0.4)
|
|
pp.subplot(1, 2, 1, title="happiness absolute average", )
|
|
|
|
for result_happy in results_happiness_db_size_avg_total_all:
|
|
pp.plot(db_sizes_label, result_happy)
|
|
|
|
pp.legend(scoring_function_labels)
|
|
pp.xlabel("number of stored configurations")
|
|
pp.ylabel("number of people")
|
|
pp.axis(axis)
|
|
|
|
pp.subplot(1, 2, 2, title="unhappiness absolute average")
|
|
|
|
|
|
for result_unhappy in results_unhappiness_db_size_avg_total_all:
|
|
pp.plot(db_sizes_label, result_unhappy)
|
|
|
|
pp.legend(scoring_function_labels)
|
|
pp.xlabel("number of stored configurations")
|
|
pp.ylabel("number of people")
|
|
pp.axis(axis)
|
|
|
|
pp.savefig("{}/fig/happy_unhappy_total_all.pdf".format(outdir),format="pdf")
|
|
|
|
def main_tuple(param):
|
|
print("----------------------------------------------------------------------------------------")
|
|
print("----------------------Starting: {}----------------------".format(param))
|
|
print("----------------------------------------------------------------------------------------")
|
|
main(amount=param[0], fullness=param[1],center=param[2] ,threshold_distance_from_centre = param[3], group_type= param[4])
|
|
return True
|
|
|
|
if __name__ == "__main__":
|
|
num_cores = multiprocessing.cpu_count()
|
|
|
|
amounts = [1]
|
|
fullnesses = [0.1]
|
|
centers = [10, 20, 30, 40, 50, 60, 70, 80, 90]
|
|
dists = [5]
|
|
g_types = ["heterogeneous", "random", "homogenous"]
|
|
|
|
params = list(itertools.product(amounts, fullnesses, centers, dists, g_types))
|
|
|
|
pool = multiprocessing.Pool(processes=num_cores)
|
|
res = pool.map(main_tuple, params)
|
|
|