Filter warnings from scipy for linaro automation output.

This is a hack. Ideally we want to fix the root cause, by providing more samples or better handling cases when we have few samples. Change-Id: I3cfcc200aae036b35b71589969cd9e597dcff341
author: Alexandre Rames <alexandre.rames@linaro.org> 2016-08-04 16:38:16 +0100
committer: Alexandre Rames <alexandre.rames@linaro.org> 2016-08-11 11:08:27 +0100
commit: fd2f5398d724e87917e244f81fca025b27ce69d6 (patch)
tree: 3ca01143468dada0be9dc7d2bc0b88e257d8b174
parent: 9c53e75186c05783167ee955c9133b9f3445de5d (diff)
download: art-testing-fd2f5398d724e87917e244f81fca025b27ce69d6.tar.gz
2 files changed, 23 insertions, 9 deletions
diff --git a/compare.py b/compare.py
index 72af151..3c7fbcd 100755
--- a/compare.py
+++ b/compare.py
@@ -62,6 +62,7 @@ def BuildOptions():
         def __init__(self, option_strings, **kwargs):
             super(LinaroAutomationAction, self).__init__(option_strings, **kwargs)
         def __call__(self, parser, namespace, values, option_string=None):
+            setattr(namespace, 'output_for_linaro_automation', True)
             setattr(namespace, 'significant_changes', True)
             setattr(namespace, 'order_by_diff', True)
     parser.add_argument('--output-for-linaro-automation',
@@ -73,20 +74,24 @@ def BuildOptions():
 
 # Filter out data entries that do not show any significant difference between
 # the two sets of results.
-def FilterSignificantChanges(data_1, data_2, wilcoxon_p_threshold, ttest_p_threshold):
+def FilterSignificantChanges(data_1, data_2,
+                             wilcoxon_p_threshold, ttest_p_threshold,
+                             filter_stats_warnings=False):
     if utils.IsDictionaryOrNone(data_1) and utils.IsDictionaryOrNone(data_2):
         keys = [k for k in data_1 if k in data_2]
         for k in keys:
             significant = FilterSignificantChanges(
                     data_1[k], data_2[k],
-                    wilcoxon_p_threshold, ttest_p_threshold)
+                    wilcoxon_p_threshold, ttest_p_threshold,
+                    filter_stats_warnings=filter_stats_warnings)
             if not significant:
                 data_1.pop(k)
                 data_2.pop(k)
         return True
 
     elif utils.IsListOrNone(data_1) and utils.IsListOrNone(data_2):
-        wilcoxon_p, ttest_p = utils_stats.ComputeStatsTests(data_1, data_2)
+        wilcoxon_p, ttest_p = utils_stats.ComputeStatsTests(
+            data_1, data_2, filter_warnings=filter_stats_warnings)
         return wilcoxon_p < wilcoxon_p_threshold or ttest_p < ttest_p_threshold
 
     else:
@@ -104,7 +109,8 @@ def PrintDiff(data_1, data_2,
               key=None,
               indentation='',
               print_extended=0,
-              order_by_diff=False):
+              order_by_diff=False,
+              filter_stats_warnings=False):
     indentation_level = '    '
     headers = ['', 'Wilcoxon P', 'T-test P',
                'median diff (%)', 'mad1 (%)', 'mad2 (%)']
@@ -129,7 +135,8 @@ def PrintDiff(data_1, data_2,
             maybe_entry = PrintDiff(value_1, value_2, k,
                                     indentation + indentation_level,
                                     print_extended=print_extended,
-                                    order_by_diff=order_by_diff)
+                                    order_by_diff=order_by_diff,
+                                    filter_stats_warnings=filter_stats_warnings)
             if maybe_entry is not None:
                 entries.append(maybe_entry)
         if entries:
@@ -143,7 +150,8 @@ def PrintDiff(data_1, data_2,
                 utils_stats.ComputeStats(data_1) if data_1 else no_results
         _, _, med2, _, madp2, ave2, _, dp2 = \
                 utils_stats.ComputeStats(data_2) if data_2 else no_results
-        wilcoxon_p, ttest_p = utils_stats.ComputeStatsTests(data_1, data_2)
+        wilcoxon_p, ttest_p = utils_stats.ComputeStatsTests(
+            data_1, data_2, filter_warnings=filter_stats_warnings)
         if data_1 and data_2:
             median_diff = utils_stats.GetRelativeDiff(med1, med2)
             mean_diff = utils_stats.GetRelativeDiff(ave1, ave2)
@@ -175,11 +183,13 @@ if __name__ == "__main__":
     if args.significant_changes:
         FilterSignificantChanges(res_1, res_2,
                                  args.wilcoxon_p_threshold,
-                                 args.ttest_p_threshold)
+                                 args.ttest_p_threshold,
+                                 filter_stats_warnings=args.output_for_linaro_automation)
 
     PrintDiff(res_1, res_2,
               print_extended=args.print_extended,
-              order_by_diff=args.order_by_diff)
+              order_by_diff=args.order_by_diff,
+              filter_stats_warnings=args.output_for_linaro_automation)
     if utils.HaveSameKeys(res_1, res_2):
         utils_stats.ComputeAndPrintRelationGeomean(
             utils.Unflatten(res_1),
diff --git a/tools/utils_stats.py b/tools/utils_stats.py
index ac43012..43feb76 100644
--- a/tools/utils_stats.py
+++ b/tools/utils_stats.py
@@ -75,9 +75,11 @@ def ComputeStats(nums):
         dp = GetRatio(d, ave)
         return m, M, median, mad, madp, ave, d, dp
 
-def ComputeStatsTests(list1, list2):
+def ComputeStatsTests(list1, list2, filter_warnings=False):
     wilcoxon_p = float('NaN')
     ttest_p = float('NaN')
+    if filter_warnings:
+        warnings.simplefilter("ignore")
     if not list1 or not list2 or len(list1) < 10 or len(list2) < 10:
         warnings.warn("Number of samples too small to compute Wilcoxon test.")
     try:
@@ -88,6 +90,8 @@ def ComputeStatsTests(list1, list2):
         ttest_p = scipy.stats.ttest_rel(list1, list2)[1]
     except:
         pass
+    if filter_warnings:
+        warnings.simplefilter("default")
     return wilcoxon_p, ttest_p
 
 def ComputeGeomeanHelper(data, res, current_key, compute_leaf_geomean):
author	Alexandre Rames <alexandre.rames@linaro.org>	2016-08-04 16:38:16 +0100
committer	Alexandre Rames <alexandre.rames@linaro.org>	2016-08-11 11:08:27 +0100
commit	fd2f5398d724e87917e244f81fca025b27ce69d6 (patch)
tree	3ca01143468dada0be9dc7d2bc0b88e257d8b174
parent	9c53e75186c05783167ee955c9133b9f3445de5d (diff)
download	art-testing-fd2f5398d724e87917e244f81fca025b27ce69d6.tar.gz