add reference for cross validation

2024-09-04 01:11:00 +02:00 · 2020-05-09 14:29:39 +02:00
parent f93a94d3a3
commit c41ee2b7f1
2 changed files with 11 additions and 1 deletions
@@ -180,7 +180,7 @@ The natural group type for the use case is a heterogeneous group but to widen th
 \subsection{The Effect of Stored Finished Configurations}
-Another important component of the evaluation is the influence of stored finished configurations. When evaluating a subset of stored finished configurations it is important to avoid outliers. This is the reason why a process inspired by \emph{cross validation} \todo{referenz hinzufügen} is used. The configuration database is randomly ordered and sliced into sub-databases of the needed size. As an example, if the evaluated stored data size is 20, a configuration database containing 100 configurations is split into five sub-databases of size 20. Now the evaluation is carried out for each of the sub-databases and finally the average is determined. This avoids the random picking of a subset which either performs much better than most other possible combinations of databases or which performs much worse. This way the data is more aligned to the expected value.
+Another important component of the evaluation is the influence of stored finished configurations. When evaluating a subset of stored finished configurations it is important to avoid outliers. This is the reason why a process inspired by \emph{cross validation} \cite{kohaviStudyCrossValidationBootstrap1995} is used. The configuration database is randomly ordered and sliced into sub-databases of the needed size. As an example, if the evaluated stored data size is 20, a configuration database containing 100 configurations is split into five sub-databases of size 20. Now the evaluation is carried out for each of the sub-databases and finally the average is determined. This avoids the random picking of a subset which either performs much better than most other possible combinations of databases or which performs much worse. This way the data is more aligned to the expected value.
 \section{Hypotheses}
 \label{sec:Evaluation:Hypotheses}
@@ -1005,6 +1005,16 @@ procedure.},
  langid = {english}
 }
@inproceedings{kohaviStudyCrossValidationBootstrap1995,
  title = {A {{Study}} of {{Cross}}-{{Validation}} and {{Bootstrap}} for {{Accuracy Estimation}} and {{Model Selection}}},
  author = {Kohavi, Ron},
  date = {1995},
  pages = {1137--1143},
  publisher = {{Morgan Kaufmann}},
  abstract = {We review accuracy estimation methods and compare the two most common methods: crossvalidation and bootstrap. Recent experimental results on artificial data and theoretical results in restricted settings have shown that for selecting a good classifier from a set of classifiers (model selection), ten-fold cross-validation may be better than the more expensive leaveone -out cross-validation. We report on a largescale experiment---over half a million runs of C4.5 and a Naive-Bayes algorithm---to estimate the effects of different parameters on these algorithms on real-world datasets. For crossvalidation, we vary the number of folds and whether the folds are stratified or not; for bootstrap, we vary the number of bootstrap samples. Our results indicate that for real-word datasets similar to ours, the best method to use for model selection is ten-fold stratified cross validation, even if computation power allows using more folds. 1 Introduction It can not be emphasized enough that no claim ...},
  file = {C\:\\Users\\Hannes.Kuchelmeister\\Zotero\\storage\\GGH5NYBZ\\Kohavi_1995_A Study of Cross-Validation and Bootstrap for Accuracy Estimation and Model.pdf;C\:\\Users\\Hannes.Kuchelmeister\\Zotero\\storage\\M7BT7CCG\\summary.html}
 }
@online{kuchelmeister13hannes11BachelorThesis,
  title = {13hannes11/Bachelor\_thesis\_m.Recommend},
  author = {Kuchelmeister, Hannes F.},