add reference for cross validation

This commit is contained in:
hannes.kuchelmeister
2020-05-09 14:29:39 +02:00
parent f93a94d3a3
commit c41ee2b7f1
2 changed files with 11 additions and 1 deletions

View File

@@ -180,7 +180,7 @@ The natural group type for the use case is a heterogeneous group but to widen th
\subsection{The Effect of Stored Finished Configurations} \subsection{The Effect of Stored Finished Configurations}
Another important component of the evaluation is the influence of stored finished configurations. When evaluating a subset of stored finished configurations it is important to avoid outliers. This is the reason why a process inspired by \emph{cross validation} \todo{referenz hinzufügen} is used. The configuration database is randomly ordered and sliced into sub-databases of the needed size. As an example, if the evaluated stored data size is 20, a configuration database containing 100 configurations is split into five sub-databases of size 20. Now the evaluation is carried out for each of the sub-databases and finally the average is determined. This avoids the random picking of a subset which either performs much better than most other possible combinations of databases or which performs much worse. This way the data is more aligned to the expected value. Another important component of the evaluation is the influence of stored finished configurations. When evaluating a subset of stored finished configurations it is important to avoid outliers. This is the reason why a process inspired by \emph{cross validation} \cite{kohaviStudyCrossValidationBootstrap1995} is used. The configuration database is randomly ordered and sliced into sub-databases of the needed size. As an example, if the evaluated stored data size is 20, a configuration database containing 100 configurations is split into five sub-databases of size 20. Now the evaluation is carried out for each of the sub-databases and finally the average is determined. This avoids the random picking of a subset which either performs much better than most other possible combinations of databases or which performs much worse. This way the data is more aligned to the expected value.
\section{Hypotheses} \section{Hypotheses}
\label{sec:Evaluation:Hypotheses} \label{sec:Evaluation:Hypotheses}

View File

@@ -1005,6 +1005,16 @@ procedure.},
langid = {english} langid = {english}
} }
@inproceedings{kohaviStudyCrossValidationBootstrap1995,
title = {A {{Study}} of {{Cross}}-{{Validation}} and {{Bootstrap}} for {{Accuracy Estimation}} and {{Model Selection}}},
author = {Kohavi, Ron},
date = {1995},
pages = {1137--1143},
publisher = {{Morgan Kaufmann}},
abstract = {We review accuracy estimation methods and compare the two most common methods: crossvalidation and bootstrap. Recent experimental results on artificial data and theoretical results in restricted settings have shown that for selecting a good classifier from a set of classifiers (model selection), ten-fold cross-validation may be better than the more expensive leaveone -out cross-validation. We report on a largescale experiment---over half a million runs of C4.5 and a Naive-Bayes algorithm---to estimate the effects of different parameters on these algorithms on real-world datasets. For crossvalidation, we vary the number of folds and whether the folds are stratified or not; for bootstrap, we vary the number of bootstrap samples. Our results indicate that for real-word datasets similar to ours, the best method to use for model selection is ten-fold stratified cross validation, even if computation power allows using more folds. 1 Introduction It can not be emphasized enough that no claim ...},
file = {C\:\\Users\\Hannes.Kuchelmeister\\Zotero\\storage\\GGH5NYBZ\\Kohavi_1995_A Study of Cross-Validation and Bootstrap for Accuracy Estimation and Model.pdf;C\:\\Users\\Hannes.Kuchelmeister\\Zotero\\storage\\M7BT7CCG\\summary.html}
}
@online{kuchelmeister13hannes11BachelorThesis, @online{kuchelmeister13hannes11BachelorThesis,
title = {13hannes11/Bachelor\_thesis\_m.Recommend}, title = {13hannes11/Bachelor\_thesis\_m.Recommend},
author = {Kuchelmeister, Hannes F.}, author = {Kuchelmeister, Hannes F.},