@techreport{NBERw18391, title = "Comparing Predictive Accuracy, Twenty Years Later: A Personal Perspective on the Use and Abuse of Diebold-Mariano Tests", author = "Francis X. Diebold", institution = "National Bureau of Economic Research", type = "Working Paper", series = "Working Paper Series", number = "18391", year = "2012", month = "September", URL = "http://www.nber.org/papers/w18391", abstract = {The Diebold-Mariano (DM) test was intended for comparing forecasts; it has been, and remains, useful in that regard. The DM test was not intended for comparing models. Unfortunately, however, much of the large subsequent literature uses DM-type tests for comparing models, in (pseudo-) out-of-sample environments. In that case, much simpler yet more compelling full-sample model comparison procedures exist; they have been, and should continue to be, widely used. The hunch that (pseudo-) out-of-sample analysis is somehow the "only," or "best," or even a "good" way to provide insurance against in-sample over-fitting in model comparisons proves largely false. On the other hand, (pseudo-) out-of-sample analysis may be useful for learning about comparative historical predictive performance.}, }