diff options
author | Alexandre Rames <alexandre.rames@linaro.org> | 2016-08-04 16:38:16 +0100 |
---|---|---|
committer | Alexandre Rames <alexandre.rames@linaro.org> | 2016-08-11 11:08:27 +0100 |
commit | fd2f5398d724e87917e244f81fca025b27ce69d6 (patch) | |
tree | 3ca01143468dada0be9dc7d2bc0b88e257d8b174 | |
parent | 9c53e75186c05783167ee955c9133b9f3445de5d (diff) | |
download | art-testing-fd2f5398d724e87917e244f81fca025b27ce69d6.tar.gz |
Filter warnings from scipy for linaro automation output.
This is a hack. Ideally we want to fix the root cause, by providing more
samples or better handling cases when we have few samples.
Change-Id: I3cfcc200aae036b35b71589969cd9e597dcff341
-rwxr-xr-x | compare.py | 26 | ||||
-rw-r--r-- | tools/utils_stats.py | 6 |
2 files changed, 23 insertions, 9 deletions
@@ -62,6 +62,7 @@ def BuildOptions(): def __init__(self, option_strings, **kwargs): super(LinaroAutomationAction, self).__init__(option_strings, **kwargs) def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, 'output_for_linaro_automation', True) setattr(namespace, 'significant_changes', True) setattr(namespace, 'order_by_diff', True) parser.add_argument('--output-for-linaro-automation', @@ -73,20 +74,24 @@ def BuildOptions(): # Filter out data entries that do not show any significant difference between # the two sets of results. -def FilterSignificantChanges(data_1, data_2, wilcoxon_p_threshold, ttest_p_threshold): +def FilterSignificantChanges(data_1, data_2, + wilcoxon_p_threshold, ttest_p_threshold, + filter_stats_warnings=False): if utils.IsDictionaryOrNone(data_1) and utils.IsDictionaryOrNone(data_2): keys = [k for k in data_1 if k in data_2] for k in keys: significant = FilterSignificantChanges( data_1[k], data_2[k], - wilcoxon_p_threshold, ttest_p_threshold) + wilcoxon_p_threshold, ttest_p_threshold, + filter_stats_warnings=filter_stats_warnings) if not significant: data_1.pop(k) data_2.pop(k) return True elif utils.IsListOrNone(data_1) and utils.IsListOrNone(data_2): - wilcoxon_p, ttest_p = utils_stats.ComputeStatsTests(data_1, data_2) + wilcoxon_p, ttest_p = utils_stats.ComputeStatsTests( + data_1, data_2, filter_warnings=filter_stats_warnings) return wilcoxon_p < wilcoxon_p_threshold or ttest_p < ttest_p_threshold else: @@ -104,7 +109,8 @@ def PrintDiff(data_1, data_2, key=None, indentation='', print_extended=0, - order_by_diff=False): + order_by_diff=False, + filter_stats_warnings=False): indentation_level = ' ' headers = ['', 'Wilcoxon P', 'T-test P', 'median diff (%)', 'mad1 (%)', 'mad2 (%)'] @@ -129,7 +135,8 @@ def PrintDiff(data_1, data_2, maybe_entry = PrintDiff(value_1, value_2, k, indentation + indentation_level, print_extended=print_extended, - order_by_diff=order_by_diff) + order_by_diff=order_by_diff, + filter_stats_warnings=filter_stats_warnings) if maybe_entry is not None: entries.append(maybe_entry) if entries: @@ -143,7 +150,8 @@ def PrintDiff(data_1, data_2, utils_stats.ComputeStats(data_1) if data_1 else no_results _, _, med2, _, madp2, ave2, _, dp2 = \ utils_stats.ComputeStats(data_2) if data_2 else no_results - wilcoxon_p, ttest_p = utils_stats.ComputeStatsTests(data_1, data_2) + wilcoxon_p, ttest_p = utils_stats.ComputeStatsTests( + data_1, data_2, filter_warnings=filter_stats_warnings) if data_1 and data_2: median_diff = utils_stats.GetRelativeDiff(med1, med2) mean_diff = utils_stats.GetRelativeDiff(ave1, ave2) @@ -175,11 +183,13 @@ if __name__ == "__main__": if args.significant_changes: FilterSignificantChanges(res_1, res_2, args.wilcoxon_p_threshold, - args.ttest_p_threshold) + args.ttest_p_threshold, + filter_stats_warnings=args.output_for_linaro_automation) PrintDiff(res_1, res_2, print_extended=args.print_extended, - order_by_diff=args.order_by_diff) + order_by_diff=args.order_by_diff, + filter_stats_warnings=args.output_for_linaro_automation) if utils.HaveSameKeys(res_1, res_2): utils_stats.ComputeAndPrintRelationGeomean( utils.Unflatten(res_1), diff --git a/tools/utils_stats.py b/tools/utils_stats.py index ac43012..43feb76 100644 --- a/tools/utils_stats.py +++ b/tools/utils_stats.py @@ -75,9 +75,11 @@ def ComputeStats(nums): dp = GetRatio(d, ave) return m, M, median, mad, madp, ave, d, dp -def ComputeStatsTests(list1, list2): +def ComputeStatsTests(list1, list2, filter_warnings=False): wilcoxon_p = float('NaN') ttest_p = float('NaN') + if filter_warnings: + warnings.simplefilter("ignore") if not list1 or not list2 or len(list1) < 10 or len(list2) < 10: warnings.warn("Number of samples too small to compute Wilcoxon test.") try: @@ -88,6 +90,8 @@ def ComputeStatsTests(list1, list2): ttest_p = scipy.stats.ttest_rel(list1, list2)[1] except: pass + if filter_warnings: + warnings.simplefilter("default") return wilcoxon_p, ttest_p def ComputeGeomeanHelper(data, res, current_key, compute_leaf_geomean): |