1/* Branch prediction routines for the GNU compiler.
2   Copyright (C) 2000-2014 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* References:
21
22   [1] "Branch Prediction for Free"
23       Ball and Larus; PLDI '93.
24   [2] "Static Branch Frequency and Program Profile Analysis"
25       Wu and Larus; MICRO-27.
26   [3] "Corpus-based Static Branch Prediction"
27       Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
28
29
30#include "config.h"
31#include "system.h"
32#include "coretypes.h"
33#include "tm.h"
34#include "tree.h"
35#include "calls.h"
36#include "rtl.h"
37#include "tm_p.h"
38#include "hard-reg-set.h"
39#include "basic-block.h"
40#include "insn-config.h"
41#include "regs.h"
42#include "flags.h"
43#include "function.h"
44#include "except.h"
45#include "diagnostic-core.h"
46#include "recog.h"
47#include "expr.h"
48#include "predict.h"
49#include "coverage.h"
50#include "sreal.h"
51#include "params.h"
52#include "target.h"
53#include "cfgloop.h"
54#include "pointer-set.h"
55#include "tree-ssa-alias.h"
56#include "internal-fn.h"
57#include "gimple-expr.h"
58#include "is-a.h"
59#include "gimple.h"
60#include "gimple-iterator.h"
61#include "gimple-ssa.h"
62#include "cgraph.h"
63#include "tree-cfg.h"
64#include "tree-phinodes.h"
65#include "ssa-iterators.h"
66#include "tree-ssa-loop-niter.h"
67#include "tree-ssa-loop.h"
68#include "tree-pass.h"
69#include "tree-scalar-evolution.h"
70#include "cfgloop.h"
71
72/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
73		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
74static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
75	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
76
77static void combine_predictions_for_insn (rtx, basic_block);
78static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
79static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
80static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
81static bool can_predict_insn_p (const_rtx);
82
83/* Information we hold about each branch predictor.
84   Filled using information from predict.def.  */
85
86struct predictor_info
87{
88  const char *const name;	/* Name used in the debugging dumps.  */
89  const int hitrate;		/* Expected hitrate used by
90				   predict_insn_def call.  */
91  const int flags;
92};
93
94/* Use given predictor without Dempster-Shaffer theory if it matches
95   using first_match heuristics.  */
96#define PRED_FLAG_FIRST_MATCH 1
97
98/* Recompute hitrate in percent to our representation.  */
99
100#define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101
102#define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
103static const struct predictor_info predictor_info[]= {
104#include "predict.def"
105
106  /* Upper bound on predictors.  */
107  {NULL, 0, 0}
108};
109#undef DEF_PREDICTOR
110
111/* Return TRUE if frequency FREQ is considered to be hot.  */
112
113static inline bool
114maybe_hot_frequency_p (struct function *fun, int freq)
115{
116  struct cgraph_node *node = cgraph_node::get (fun->decl);
117  if (!profile_info || !flag_branch_probabilities)
118    {
119      if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
120        return false;
121      if (node->frequency == NODE_FREQUENCY_HOT)
122        return true;
123    }
124  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
125    return true;
126  if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
127      && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
128    return false;
129  if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
130    return false;
131  if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
132	      / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
133    return false;
134  return true;
135}
136
137static gcov_type min_count = -1;
138
139/* Determine the threshold for hot BB counts.  */
140
141gcov_type
142get_hot_bb_threshold ()
143{
144  gcov_working_set_t *ws;
145  if (min_count == -1)
146    {
147      ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
148      gcc_assert (ws);
149      min_count = ws->min_counter;
150    }
151  return min_count;
152}
153
154/* Set the threshold for hot BB counts.  */
155
156void
157set_hot_bb_threshold (gcov_type min)
158{
159  min_count = min;
160}
161
162/* Return TRUE if frequency FREQ is considered to be hot.  */
163
164static inline bool
165maybe_hot_count_p (struct function *fun, gcov_type count)
166{
167  if (fun && profile_status_for_fn (fun) != PROFILE_READ)
168    return true;
169  /* Code executed at most once is not hot.  */
170  if (profile_info->runs >= count)
171    return false;
172  return (count >= get_hot_bb_threshold ());
173}
174
175/* Return true in case BB can be CPU intensive and should be optimized
176   for maximal performance.  */
177
178bool
179maybe_hot_bb_p (struct function *fun, const_basic_block bb)
180{
181  gcc_checking_assert (fun);
182  if (profile_status_for_fn (fun) == PROFILE_READ)
183    return maybe_hot_count_p (fun, bb->count);
184  return maybe_hot_frequency_p (fun, bb->frequency);
185}
186
187/* Return true if the call can be hot.  */
188
189bool
190cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
191{
192  if (profile_info && flag_branch_probabilities
193      && !maybe_hot_count_p (NULL,
194                             edge->count))
195    return false;
196  if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
197      || (edge->callee
198	  && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
199    return false;
200  if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
201      && (edge->callee
202	  && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
203    return false;
204  if (optimize_size)
205    return false;
206  if (edge->caller->frequency == NODE_FREQUENCY_HOT)
207    return true;
208  if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
209      && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
210    return false;
211  if (flag_guess_branch_prob)
212    {
213      if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
214	  || edge->frequency <= (CGRAPH_FREQ_BASE
215				 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
216        return false;
217    }
218  return true;
219}
220
221/* Return true in case BB can be CPU intensive and should be optimized
222   for maximal performance.  */
223
224bool
225maybe_hot_edge_p (edge e)
226{
227  if (profile_status_for_fn (cfun) == PROFILE_READ)
228    return maybe_hot_count_p (cfun, e->count);
229  return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
230}
231
232
233
234/* Return true if profile COUNT and FREQUENCY, or function FUN static
235   node frequency reflects never being executed.  */
236
237static bool
238probably_never_executed (struct function *fun,
239                         gcov_type count, int frequency)
240{
241  gcc_checking_assert (fun);
242  if (profile_status_for_fn (cfun) == PROFILE_READ)
243    {
244      int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
245      if (count * unlikely_count_fraction >= profile_info->runs)
246	return false;
247      if (!frequency)
248	return true;
249      if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
250	return false;
251      if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
252	{
253          gcov_type computed_count;
254          /* Check for possibility of overflow, in which case entry bb count
255             is large enough to do the division first without losing much
256             precision.  */
257	  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
258	      REG_BR_PROB_BASE)
259            {
260              gcov_type scaled_count
261		  = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
262	     unlikely_count_fraction;
263	      computed_count = RDIV (scaled_count,
264				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
265            }
266          else
267            {
268	      computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
269				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
270              computed_count *= frequency * unlikely_count_fraction;
271            }
272          if (computed_count >= profile_info->runs)
273            return false;
274	}
275      return true;
276    }
277  if ((!profile_info || !flag_branch_probabilities)
278      && (cgraph_node::get (fun->decl)->frequency
279	  == NODE_FREQUENCY_UNLIKELY_EXECUTED))
280    return true;
281  return false;
282}
283
284
285/* Return true in case BB is probably never executed.  */
286
287bool
288probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
289{
290  return probably_never_executed (fun, bb->count, bb->frequency);
291}
292
293
294/* Return true in case edge E is probably never executed.  */
295
296bool
297probably_never_executed_edge_p (struct function *fun, edge e)
298{
299  return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
300}
301
302/* Return true if function should be optimized for size.  */
303
304bool
305cgraph_node::optimize_for_size_p (void)
306{
307  if (optimize_size)
308    return true;
309  if (frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
310    return true;
311  else
312    return false;
313}
314
315/* Return true when current function should always be optimized for size.  */
316
317bool
318optimize_function_for_size_p (struct function *fun)
319{
320  if (optimize_size)
321    return true;
322  if (!fun || !fun->decl)
323    return false;
324
325  cgraph_node *n = cgraph_node::get (fun->decl);
326  return n && n->optimize_for_size_p ();
327}
328
329/* Return true when current function should always be optimized for speed.  */
330
331bool
332optimize_function_for_speed_p (struct function *fun)
333{
334  return !optimize_function_for_size_p (fun);
335}
336
337/* Return TRUE when BB should be optimized for size.  */
338
339bool
340optimize_bb_for_size_p (const_basic_block bb)
341{
342  return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
343}
344
345/* Return TRUE when BB should be optimized for speed.  */
346
347bool
348optimize_bb_for_speed_p (const_basic_block bb)
349{
350  return !optimize_bb_for_size_p (bb);
351}
352
353/* Return TRUE when BB should be optimized for size.  */
354
355bool
356optimize_edge_for_size_p (edge e)
357{
358  return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
359}
360
361/* Return TRUE when BB should be optimized for speed.  */
362
363bool
364optimize_edge_for_speed_p (edge e)
365{
366  return !optimize_edge_for_size_p (e);
367}
368
369/* Return TRUE when BB should be optimized for size.  */
370
371bool
372optimize_insn_for_size_p (void)
373{
374  return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
375}
376
377/* Return TRUE when BB should be optimized for speed.  */
378
379bool
380optimize_insn_for_speed_p (void)
381{
382  return !optimize_insn_for_size_p ();
383}
384
385/* Return TRUE when LOOP should be optimized for size.  */
386
387bool
388optimize_loop_for_size_p (struct loop *loop)
389{
390  return optimize_bb_for_size_p (loop->header);
391}
392
393/* Return TRUE when LOOP should be optimized for speed.  */
394
395bool
396optimize_loop_for_speed_p (struct loop *loop)
397{
398  return optimize_bb_for_speed_p (loop->header);
399}
400
401/* Return TRUE when LOOP nest should be optimized for speed.  */
402
403bool
404optimize_loop_nest_for_speed_p (struct loop *loop)
405{
406  struct loop *l = loop;
407  if (optimize_loop_for_speed_p (loop))
408    return true;
409  l = loop->inner;
410  while (l && l != loop)
411    {
412      if (optimize_loop_for_speed_p (l))
413        return true;
414      if (l->inner)
415        l = l->inner;
416      else if (l->next)
417        l = l->next;
418      else
419        {
420	  while (l != loop && !l->next)
421	    l = loop_outer (l);
422	  if (l != loop)
423	    l = l->next;
424	}
425    }
426  return false;
427}
428
429/* Return TRUE when LOOP nest should be optimized for size.  */
430
431bool
432optimize_loop_nest_for_size_p (struct loop *loop)
433{
434  return !optimize_loop_nest_for_speed_p (loop);
435}
436
437/* Return true when edge E is likely to be well predictable by branch
438   predictor.  */
439
440bool
441predictable_edge_p (edge e)
442{
443  if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
444    return false;
445  if ((e->probability
446       <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
447      || (REG_BR_PROB_BASE - e->probability
448          <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
449    return true;
450  return false;
451}
452
453
454/* Set RTL expansion for BB profile.  */
455
456void
457rtl_profile_for_bb (basic_block bb)
458{
459  crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
460}
461
462/* Set RTL expansion for edge profile.  */
463
464void
465rtl_profile_for_edge (edge e)
466{
467  crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
468}
469
470/* Set RTL expansion to default mode (i.e. when profile info is not known).  */
471void
472default_rtl_profile (void)
473{
474  crtl->maybe_hot_insn_p = true;
475}
476
477/* Return true if the one of outgoing edges is already predicted by
478   PREDICTOR.  */
479
480bool
481rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
482{
483  rtx note;
484  if (!INSN_P (BB_END (bb)))
485    return false;
486  for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
487    if (REG_NOTE_KIND (note) == REG_BR_PRED
488	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
489      return true;
490  return false;
491}
492
493/* This map contains for a basic block the list of predictions for the
494   outgoing edges.  */
495
496static struct pointer_map_t *bb_predictions;
497
498/*  Structure representing predictions in tree level. */
499
500struct edge_prediction {
501    struct edge_prediction *ep_next;
502    edge ep_edge;
503    enum br_predictor ep_predictor;
504    int ep_probability;
505};
506
507/* Return true if the one of outgoing edges is already predicted by
508   PREDICTOR.  */
509
510bool
511gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
512{
513  struct edge_prediction *i;
514  void **preds = pointer_map_contains (bb_predictions, bb);
515
516  if (!preds)
517    return false;
518
519  for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
520    if (i->ep_predictor == predictor)
521      return true;
522  return false;
523}
524
525/* Return true when the probability of edge is reliable.
526
527   The profile guessing code is good at predicting branch outcome (ie.
528   taken/not taken), that is predicted right slightly over 75% of time.
529   It is however notoriously poor on predicting the probability itself.
530   In general the profile appear a lot flatter (with probabilities closer
531   to 50%) than the reality so it is bad idea to use it to drive optimization
532   such as those disabling dynamic branch prediction for well predictable
533   branches.
534
535   There are two exceptions - edges leading to noreturn edges and edges
536   predicted by number of iterations heuristics are predicted well.  This macro
537   should be able to distinguish those, but at the moment it simply check for
538   noreturn heuristic that is only one giving probability over 99% or bellow
539   1%.  In future we might want to propagate reliability information across the
540   CFG if we find this information useful on multiple places.   */
541static bool
542probability_reliable_p (int prob)
543{
544  return (profile_status_for_fn (cfun) == PROFILE_READ
545	  || (profile_status_for_fn (cfun) == PROFILE_GUESSED
546	      && (prob <= HITRATE (1) || prob >= HITRATE (99))));
547}
548
549/* Same predicate as above, working on edges.  */
550bool
551edge_probability_reliable_p (const_edge e)
552{
553  return probability_reliable_p (e->probability);
554}
555
556/* Same predicate as edge_probability_reliable_p, working on notes.  */
557bool
558br_prob_note_reliable_p (const_rtx note)
559{
560  gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
561  return probability_reliable_p (XINT (note, 0));
562}
563
564static void
565predict_insn (rtx insn, enum br_predictor predictor, int probability)
566{
567  gcc_assert (any_condjump_p (insn));
568  if (!flag_guess_branch_prob)
569    return;
570
571  add_reg_note (insn, REG_BR_PRED,
572		gen_rtx_CONCAT (VOIDmode,
573				GEN_INT ((int) predictor),
574				GEN_INT ((int) probability)));
575}
576
577/* Predict insn by given predictor.  */
578
579void
580predict_insn_def (rtx insn, enum br_predictor predictor,
581		  enum prediction taken)
582{
583   int probability = predictor_info[(int) predictor].hitrate;
584
585   if (taken != TAKEN)
586     probability = REG_BR_PROB_BASE - probability;
587
588   predict_insn (insn, predictor, probability);
589}
590
591/* Predict edge E with given probability if possible.  */
592
593void
594rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
595{
596  rtx last_insn;
597  last_insn = BB_END (e->src);
598
599  /* We can store the branch prediction information only about
600     conditional jumps.  */
601  if (!any_condjump_p (last_insn))
602    return;
603
604  /* We always store probability of branching.  */
605  if (e->flags & EDGE_FALLTHRU)
606    probability = REG_BR_PROB_BASE - probability;
607
608  predict_insn (last_insn, predictor, probability);
609}
610
611/* Predict edge E with the given PROBABILITY.  */
612void
613gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
614{
615  gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
616  if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
617       1)
618      && flag_guess_branch_prob && optimize)
619    {
620      struct edge_prediction *i = XNEW (struct edge_prediction);
621      void **preds = pointer_map_insert (bb_predictions, e->src);
622
623      i->ep_next = (struct edge_prediction *) *preds;
624      *preds = i;
625      i->ep_probability = probability;
626      i->ep_predictor = predictor;
627      i->ep_edge = e;
628    }
629}
630
631/* Remove all predictions on given basic block that are attached
632   to edge E.  */
633void
634remove_predictions_associated_with_edge (edge e)
635{
636  void **preds;
637
638  if (!bb_predictions)
639    return;
640
641  preds = pointer_map_contains (bb_predictions, e->src);
642
643  if (preds)
644    {
645      struct edge_prediction **prediction = (struct edge_prediction **) preds;
646      struct edge_prediction *next;
647
648      while (*prediction)
649	{
650	  if ((*prediction)->ep_edge == e)
651	    {
652	      next = (*prediction)->ep_next;
653	      free (*prediction);
654	      *prediction = next;
655	    }
656	  else
657	    prediction = &((*prediction)->ep_next);
658	}
659    }
660}
661
662/* Clears the list of predictions stored for BB.  */
663
664static void
665clear_bb_predictions (basic_block bb)
666{
667  void **preds = pointer_map_contains (bb_predictions, bb);
668  struct edge_prediction *pred, *next;
669
670  if (!preds)
671    return;
672
673  for (pred = (struct edge_prediction *) *preds; pred; pred = next)
674    {
675      next = pred->ep_next;
676      free (pred);
677    }
678  *preds = NULL;
679}
680
681/* Return true when we can store prediction on insn INSN.
682   At the moment we represent predictions only on conditional
683   jumps, not at computed jump or other complicated cases.  */
684static bool
685can_predict_insn_p (const_rtx insn)
686{
687  return (JUMP_P (insn)
688	  && any_condjump_p (insn)
689	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
690}
691
692/* Predict edge E by given predictor if possible.  */
693
694void
695predict_edge_def (edge e, enum br_predictor predictor,
696		  enum prediction taken)
697{
698   int probability = predictor_info[(int) predictor].hitrate;
699
700   if (taken != TAKEN)
701     probability = REG_BR_PROB_BASE - probability;
702
703   predict_edge (e, predictor, probability);
704}
705
706/* Invert all branch predictions or probability notes in the INSN.  This needs
707   to be done each time we invert the condition used by the jump.  */
708
709void
710invert_br_probabilities (rtx insn)
711{
712  rtx note;
713
714  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
715    if (REG_NOTE_KIND (note) == REG_BR_PROB)
716      XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
717    else if (REG_NOTE_KIND (note) == REG_BR_PRED)
718      XEXP (XEXP (note, 0), 1)
719	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
720}
721
722/* Dump information about the branch prediction to the output file.  */
723
724static void
725dump_prediction (FILE *file, enum br_predictor predictor, int probability,
726		 basic_block bb, int used)
727{
728  edge e;
729  edge_iterator ei;
730
731  if (!file)
732    return;
733
734  FOR_EACH_EDGE (e, ei, bb->succs)
735    if (! (e->flags & EDGE_FALLTHRU))
736      break;
737
738  fprintf (file, "  %s heuristics%s: %.1f%%",
739	   predictor_info[predictor].name,
740	   used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
741
742  if (bb->count)
743    {
744      fprintf (file, "  exec %"PRId64, bb->count);
745      if (e)
746	{
747	  fprintf (file, " hit %"PRId64, e->count);
748	  fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
749	}
750    }
751
752  fprintf (file, "\n");
753}
754
755/* We can not predict the probabilities of outgoing edges of bb.  Set them
756   evenly and hope for the best.  */
757static void
758set_even_probabilities (basic_block bb)
759{
760  int nedges = 0;
761  edge e;
762  edge_iterator ei;
763
764  FOR_EACH_EDGE (e, ei, bb->succs)
765    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
766      nedges ++;
767  FOR_EACH_EDGE (e, ei, bb->succs)
768    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
769      e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
770    else
771      e->probability = 0;
772}
773
774/* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
775   note if not already present.  Remove now useless REG_BR_PRED notes.  */
776
777static void
778combine_predictions_for_insn (rtx insn, basic_block bb)
779{
780  rtx prob_note;
781  rtx *pnote;
782  rtx note;
783  int best_probability = PROB_EVEN;
784  enum br_predictor best_predictor = END_PREDICTORS;
785  int combined_probability = REG_BR_PROB_BASE / 2;
786  int d;
787  bool first_match = false;
788  bool found = false;
789
790  if (!can_predict_insn_p (insn))
791    {
792      set_even_probabilities (bb);
793      return;
794    }
795
796  prob_note = find_reg_note (insn, REG_BR_PROB, 0);
797  pnote = &REG_NOTES (insn);
798  if (dump_file)
799    fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
800	     bb->index);
801
802  /* We implement "first match" heuristics and use probability guessed
803     by predictor with smallest index.  */
804  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
805    if (REG_NOTE_KIND (note) == REG_BR_PRED)
806      {
807	enum br_predictor predictor = ((enum br_predictor)
808				       INTVAL (XEXP (XEXP (note, 0), 0)));
809	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
810
811	found = true;
812	if (best_predictor > predictor)
813	  best_probability = probability, best_predictor = predictor;
814
815	d = (combined_probability * probability
816	     + (REG_BR_PROB_BASE - combined_probability)
817	     * (REG_BR_PROB_BASE - probability));
818
819	/* Use FP math to avoid overflows of 32bit integers.  */
820	if (d == 0)
821	  /* If one probability is 0% and one 100%, avoid division by zero.  */
822	  combined_probability = REG_BR_PROB_BASE / 2;
823	else
824	  combined_probability = (((double) combined_probability) * probability
825				  * REG_BR_PROB_BASE / d + 0.5);
826      }
827
828  /* Decide which heuristic to use.  In case we didn't match anything,
829     use no_prediction heuristic, in case we did match, use either
830     first match or Dempster-Shaffer theory depending on the flags.  */
831
832  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
833    first_match = true;
834
835  if (!found)
836    dump_prediction (dump_file, PRED_NO_PREDICTION,
837		     combined_probability, bb, true);
838  else
839    {
840      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
841		       bb, !first_match);
842      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
843		       bb, first_match);
844    }
845
846  if (first_match)
847    combined_probability = best_probability;
848  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
849
850  while (*pnote)
851    {
852      if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
853	{
854	  enum br_predictor predictor = ((enum br_predictor)
855					 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
856	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
857
858	  dump_prediction (dump_file, predictor, probability, bb,
859			   !first_match || best_predictor == predictor);
860	  *pnote = XEXP (*pnote, 1);
861	}
862      else
863	pnote = &XEXP (*pnote, 1);
864    }
865
866  if (!prob_note)
867    {
868      add_int_reg_note (insn, REG_BR_PROB, combined_probability);
869
870      /* Save the prediction into CFG in case we are seeing non-degenerated
871	 conditional jump.  */
872      if (!single_succ_p (bb))
873	{
874	  BRANCH_EDGE (bb)->probability = combined_probability;
875	  FALLTHRU_EDGE (bb)->probability
876	    = REG_BR_PROB_BASE - combined_probability;
877	}
878    }
879  else if (!single_succ_p (bb))
880    {
881      int prob = XINT (prob_note, 0);
882
883      BRANCH_EDGE (bb)->probability = prob;
884      FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
885    }
886  else
887    single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
888}
889
890/* Combine predictions into single probability and store them into CFG.
891   Remove now useless prediction entries.  */
892
893static void
894combine_predictions_for_bb (basic_block bb)
895{
896  int best_probability = PROB_EVEN;
897  enum br_predictor best_predictor = END_PREDICTORS;
898  int combined_probability = REG_BR_PROB_BASE / 2;
899  int d;
900  bool first_match = false;
901  bool found = false;
902  struct edge_prediction *pred;
903  int nedges = 0;
904  edge e, first = NULL, second = NULL;
905  edge_iterator ei;
906  void **preds;
907
908  FOR_EACH_EDGE (e, ei, bb->succs)
909    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
910      {
911	nedges ++;
912	if (first && !second)
913	  second = e;
914	if (!first)
915	  first = e;
916      }
917
918  /* When there is no successor or only one choice, prediction is easy.
919
920     We are lazy for now and predict only basic blocks with two outgoing
921     edges.  It is possible to predict generic case too, but we have to
922     ignore first match heuristics and do more involved combining.  Implement
923     this later.  */
924  if (nedges != 2)
925    {
926      if (!bb->count)
927	set_even_probabilities (bb);
928      clear_bb_predictions (bb);
929      if (dump_file)
930	fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
931		 nedges, bb->index);
932      return;
933    }
934
935  if (dump_file)
936    fprintf (dump_file, "Predictions for bb %i\n", bb->index);
937
938  preds = pointer_map_contains (bb_predictions, bb);
939  if (preds)
940    {
941      /* We implement "first match" heuristics and use probability guessed
942	 by predictor with smallest index.  */
943      for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
944	{
945	  enum br_predictor predictor = pred->ep_predictor;
946	  int probability = pred->ep_probability;
947
948	  if (pred->ep_edge != first)
949	    probability = REG_BR_PROB_BASE - probability;
950
951	  found = true;
952	  /* First match heuristics would be widly confused if we predicted
953	     both directions.  */
954	  if (best_predictor > predictor)
955	    {
956              struct edge_prediction *pred2;
957	      int prob = probability;
958
959	      for (pred2 = (struct edge_prediction *) *preds;
960		   pred2; pred2 = pred2->ep_next)
961	       if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
962	         {
963	           int probability2 = pred->ep_probability;
964
965		   if (pred2->ep_edge != first)
966		     probability2 = REG_BR_PROB_BASE - probability2;
967
968		   if ((probability < REG_BR_PROB_BASE / 2) !=
969		       (probability2 < REG_BR_PROB_BASE / 2))
970		     break;
971
972		   /* If the same predictor later gave better result, go for it! */
973		   if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
974		       || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
975		     prob = probability2;
976		 }
977	      if (!pred2)
978	        best_probability = prob, best_predictor = predictor;
979	    }
980
981	  d = (combined_probability * probability
982	       + (REG_BR_PROB_BASE - combined_probability)
983	       * (REG_BR_PROB_BASE - probability));
984
985	  /* Use FP math to avoid overflows of 32bit integers.  */
986	  if (d == 0)
987	    /* If one probability is 0% and one 100%, avoid division by zero.  */
988	    combined_probability = REG_BR_PROB_BASE / 2;
989	  else
990	    combined_probability = (((double) combined_probability)
991				    * probability
992		    		    * REG_BR_PROB_BASE / d + 0.5);
993	}
994    }
995
996  /* Decide which heuristic to use.  In case we didn't match anything,
997     use no_prediction heuristic, in case we did match, use either
998     first match or Dempster-Shaffer theory depending on the flags.  */
999
1000  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
1001    first_match = true;
1002
1003  if (!found)
1004    dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1005  else
1006    {
1007      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1008		       !first_match);
1009      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1010		       first_match);
1011    }
1012
1013  if (first_match)
1014    combined_probability = best_probability;
1015  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1016
1017  if (preds)
1018    {
1019      for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1020	{
1021	  enum br_predictor predictor = pred->ep_predictor;
1022	  int probability = pred->ep_probability;
1023
1024	  if (pred->ep_edge != EDGE_SUCC (bb, 0))
1025	    probability = REG_BR_PROB_BASE - probability;
1026	  dump_prediction (dump_file, predictor, probability, bb,
1027			   !first_match || best_predictor == predictor);
1028	}
1029    }
1030  clear_bb_predictions (bb);
1031
1032  if (!bb->count)
1033    {
1034      first->probability = combined_probability;
1035      second->probability = REG_BR_PROB_BASE - combined_probability;
1036    }
1037}
1038
1039/* Check if T1 and T2 satisfy the IV_COMPARE condition.
1040   Return the SSA_NAME if the condition satisfies, NULL otherwise.
1041
1042   T1 and T2 should be one of the following cases:
1043     1. T1 is SSA_NAME, T2 is NULL
1044     2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1045     3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1046
1047static tree
1048strips_small_constant (tree t1, tree t2)
1049{
1050  tree ret = NULL;
1051  int value = 0;
1052
1053  if (!t1)
1054    return NULL;
1055  else if (TREE_CODE (t1) == SSA_NAME)
1056    ret = t1;
1057  else if (tree_fits_shwi_p (t1))
1058    value = tree_to_shwi (t1);
1059  else
1060    return NULL;
1061
1062  if (!t2)
1063    return ret;
1064  else if (tree_fits_shwi_p (t2))
1065    value = tree_to_shwi (t2);
1066  else if (TREE_CODE (t2) == SSA_NAME)
1067    {
1068      if (ret)
1069        return NULL;
1070      else
1071        ret = t2;
1072    }
1073
1074  if (value <= 4 && value >= -4)
1075    return ret;
1076  else
1077    return NULL;
1078}
1079
1080/* Return the SSA_NAME in T or T's operands.
1081   Return NULL if SSA_NAME cannot be found.  */
1082
1083static tree
1084get_base_value (tree t)
1085{
1086  if (TREE_CODE (t) == SSA_NAME)
1087    return t;
1088
1089  if (!BINARY_CLASS_P (t))
1090    return NULL;
1091
1092  switch (TREE_OPERAND_LENGTH (t))
1093    {
1094    case 1:
1095      return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1096    case 2:
1097      return strips_small_constant (TREE_OPERAND (t, 0),
1098				    TREE_OPERAND (t, 1));
1099    default:
1100      return NULL;
1101    }
1102}
1103
1104/* Check the compare STMT in LOOP. If it compares an induction
1105   variable to a loop invariant, return true, and save
1106   LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1107   Otherwise return false and set LOOP_INVAIANT to NULL.  */
1108
1109static bool
1110is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1111				     tree *loop_invariant,
1112				     enum tree_code *compare_code,
1113				     tree *loop_step,
1114				     tree *loop_iv_base)
1115{
1116  tree op0, op1, bound, base;
1117  affine_iv iv0, iv1;
1118  enum tree_code code;
1119  tree step;
1120
1121  code = gimple_cond_code (stmt);
1122  *loop_invariant = NULL;
1123
1124  switch (code)
1125    {
1126    case GT_EXPR:
1127    case GE_EXPR:
1128    case NE_EXPR:
1129    case LT_EXPR:
1130    case LE_EXPR:
1131    case EQ_EXPR:
1132      break;
1133
1134    default:
1135      return false;
1136    }
1137
1138  op0 = gimple_cond_lhs (stmt);
1139  op1 = gimple_cond_rhs (stmt);
1140
1141  if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1142       || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1143    return false;
1144  if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1145    return false;
1146  if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1147    return false;
1148  if (TREE_CODE (iv0.step) != INTEGER_CST
1149      || TREE_CODE (iv1.step) != INTEGER_CST)
1150    return false;
1151  if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1152      || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1153    return false;
1154
1155  if (integer_zerop (iv0.step))
1156    {
1157      if (code != NE_EXPR && code != EQ_EXPR)
1158	code = invert_tree_comparison (code, false);
1159      bound = iv0.base;
1160      base = iv1.base;
1161      if (tree_fits_shwi_p (iv1.step))
1162	step = iv1.step;
1163      else
1164	return false;
1165    }
1166  else
1167    {
1168      bound = iv1.base;
1169      base = iv0.base;
1170      if (tree_fits_shwi_p (iv0.step))
1171	step = iv0.step;
1172      else
1173	return false;
1174    }
1175
1176  if (TREE_CODE (bound) != INTEGER_CST)
1177    bound = get_base_value (bound);
1178  if (!bound)
1179    return false;
1180  if (TREE_CODE (base) != INTEGER_CST)
1181    base = get_base_value (base);
1182  if (!base)
1183    return false;
1184
1185  *loop_invariant = bound;
1186  *compare_code = code;
1187  *loop_step = step;
1188  *loop_iv_base = base;
1189  return true;
1190}
1191
1192/* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1193
1194static bool
1195expr_coherent_p (tree t1, tree t2)
1196{
1197  gimple stmt;
1198  tree ssa_name_1 = NULL;
1199  tree ssa_name_2 = NULL;
1200
1201  gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1202  gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1203
1204  if (t1 == t2)
1205    return true;
1206
1207  if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1208    return true;
1209  if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1210    return false;
1211
1212  /* Check to see if t1 is expressed/defined with t2.  */
1213  stmt = SSA_NAME_DEF_STMT (t1);
1214  gcc_assert (stmt != NULL);
1215  if (is_gimple_assign (stmt))
1216    {
1217      ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1218      if (ssa_name_1 && ssa_name_1 == t2)
1219	return true;
1220    }
1221
1222  /* Check to see if t2 is expressed/defined with t1.  */
1223  stmt = SSA_NAME_DEF_STMT (t2);
1224  gcc_assert (stmt != NULL);
1225  if (is_gimple_assign (stmt))
1226    {
1227      ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1228      if (ssa_name_2 && ssa_name_2 == t1)
1229	return true;
1230    }
1231
1232  /* Compare if t1 and t2's def_stmts are identical.  */
1233  if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1234    return true;
1235  else
1236    return false;
1237}
1238
1239/* Predict branch probability of BB when BB contains a branch that compares
1240   an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1241   loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1242
1243   E.g.
1244     for (int i = 0; i < bound; i++) {
1245       if (i < bound - 2)
1246	 computation_1();
1247       else
1248	 computation_2();
1249     }
1250
1251  In this loop, we will predict the branch inside the loop to be taken.  */
1252
1253static void
1254predict_iv_comparison (struct loop *loop, basic_block bb,
1255		       tree loop_bound_var,
1256		       tree loop_iv_base_var,
1257		       enum tree_code loop_bound_code,
1258		       int loop_bound_step)
1259{
1260  gimple stmt;
1261  tree compare_var, compare_base;
1262  enum tree_code compare_code;
1263  tree compare_step_var;
1264  edge then_edge;
1265  edge_iterator ei;
1266
1267  if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1268      || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1269      || predicted_by_p (bb, PRED_LOOP_EXIT))
1270    return;
1271
1272  stmt = last_stmt (bb);
1273  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1274    return;
1275  if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1276					    &compare_code,
1277					    &compare_step_var,
1278					    &compare_base))
1279    return;
1280
1281  /* Find the taken edge.  */
1282  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1283    if (then_edge->flags & EDGE_TRUE_VALUE)
1284      break;
1285
1286  /* When comparing an IV to a loop invariant, NE is more likely to be
1287     taken while EQ is more likely to be not-taken.  */
1288  if (compare_code == NE_EXPR)
1289    {
1290      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1291      return;
1292    }
1293  else if (compare_code == EQ_EXPR)
1294    {
1295      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1296      return;
1297    }
1298
1299  if (!expr_coherent_p (loop_iv_base_var, compare_base))
1300    return;
1301
1302  /* If loop bound, base and compare bound are all constants, we can
1303     calculate the probability directly.  */
1304  if (tree_fits_shwi_p (loop_bound_var)
1305      && tree_fits_shwi_p (compare_var)
1306      && tree_fits_shwi_p (compare_base))
1307    {
1308      int probability;
1309      bool overflow, overall_overflow = false;
1310      widest_int compare_count, tem;
1311
1312      /* (loop_bound - base) / compare_step */
1313      tem = wi::sub (wi::to_widest (loop_bound_var),
1314		     wi::to_widest (compare_base), SIGNED, &overflow);
1315      overall_overflow |= overflow;
1316      widest_int loop_count = wi::div_trunc (tem,
1317					     wi::to_widest (compare_step_var),
1318					     SIGNED, &overflow);
1319      overall_overflow |= overflow;
1320
1321      if (!wi::neg_p (wi::to_widest (compare_step_var))
1322          ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1323	{
1324	  /* (loop_bound - compare_bound) / compare_step */
1325	  tem = wi::sub (wi::to_widest (loop_bound_var),
1326			 wi::to_widest (compare_var), SIGNED, &overflow);
1327	  overall_overflow |= overflow;
1328	  compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1329					 SIGNED, &overflow);
1330	  overall_overflow |= overflow;
1331	}
1332      else
1333        {
1334	  /* (compare_bound - base) / compare_step */
1335	  tem = wi::sub (wi::to_widest (compare_var),
1336			 wi::to_widest (compare_base), SIGNED, &overflow);
1337	  overall_overflow |= overflow;
1338          compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1339					 SIGNED, &overflow);
1340	  overall_overflow |= overflow;
1341	}
1342      if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1343	++compare_count;
1344      if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1345	++loop_count;
1346      if (wi::neg_p (compare_count))
1347        compare_count = 0;
1348      if (wi::neg_p (loop_count))
1349        loop_count = 0;
1350      if (loop_count == 0)
1351	probability = 0;
1352      else if (wi::cmps (compare_count, loop_count) == 1)
1353	probability = REG_BR_PROB_BASE;
1354      else
1355        {
1356	  tem = compare_count * REG_BR_PROB_BASE;
1357	  tem = wi::udiv_trunc (tem, loop_count);
1358	  probability = tem.to_uhwi ();
1359	}
1360
1361      if (!overall_overflow)
1362        predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1363
1364      return;
1365    }
1366
1367  if (expr_coherent_p (loop_bound_var, compare_var))
1368    {
1369      if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1370	  && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1371	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1372      else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1373	       && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1374	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1375      else if (loop_bound_code == NE_EXPR)
1376	{
1377	  /* If the loop backedge condition is "(i != bound)", we do
1378	     the comparison based on the step of IV:
1379	     * step < 0 : backedge condition is like (i > bound)
1380	     * step > 0 : backedge condition is like (i < bound)  */
1381	  gcc_assert (loop_bound_step != 0);
1382	  if (loop_bound_step > 0
1383	      && (compare_code == LT_EXPR
1384		  || compare_code == LE_EXPR))
1385	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1386	  else if (loop_bound_step < 0
1387		   && (compare_code == GT_EXPR
1388		       || compare_code == GE_EXPR))
1389	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1390	  else
1391	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1392	}
1393      else
1394	/* The branch is predicted not-taken if loop_bound_code is
1395	   opposite with compare_code.  */
1396	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1397    }
1398  else if (expr_coherent_p (loop_iv_base_var, compare_var))
1399    {
1400      /* For cases like:
1401	   for (i = s; i < h; i++)
1402	     if (i > s + 2) ....
1403	 The branch should be predicted taken.  */
1404      if (loop_bound_step > 0
1405	  && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1406	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1407      else if (loop_bound_step < 0
1408	       && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1409	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1410      else
1411	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1412    }
1413}
1414
1415/* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1416   exits are resulted from short-circuit conditions that will generate an
1417   if_tmp. E.g.:
1418
1419   if (foo() || global > 10)
1420     break;
1421
1422   This will be translated into:
1423
1424   BB3:
1425     loop header...
1426   BB4:
1427     if foo() goto BB6 else goto BB5
1428   BB5:
1429     if global > 10 goto BB6 else goto BB7
1430   BB6:
1431     goto BB7
1432   BB7:
1433     iftmp = (PHI 0(BB5), 1(BB6))
1434     if iftmp == 1 goto BB8 else goto BB3
1435   BB8:
1436     outside of the loop...
1437
1438   The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1439   From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1440   exits. This function takes BB7->BB8 as input, and finds out the extra loop
1441   exits to predict them using PRED_LOOP_EXIT.  */
1442
1443static void
1444predict_extra_loop_exits (edge exit_edge)
1445{
1446  unsigned i;
1447  bool check_value_one;
1448  gimple phi_stmt;
1449  tree cmp_rhs, cmp_lhs;
1450  gimple cmp_stmt = last_stmt (exit_edge->src);
1451
1452  if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1453    return;
1454  cmp_rhs = gimple_cond_rhs (cmp_stmt);
1455  cmp_lhs = gimple_cond_lhs (cmp_stmt);
1456  if (!TREE_CONSTANT (cmp_rhs)
1457      || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1458    return;
1459  if (TREE_CODE (cmp_lhs) != SSA_NAME)
1460    return;
1461
1462  /* If check_value_one is true, only the phi_args with value '1' will lead
1463     to loop exit. Otherwise, only the phi_args with value '0' will lead to
1464     loop exit.  */
1465  check_value_one = (((integer_onep (cmp_rhs))
1466		    ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1467		    ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1468
1469  phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1470  if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1471    return;
1472
1473  for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1474    {
1475      edge e1;
1476      edge_iterator ei;
1477      tree val = gimple_phi_arg_def (phi_stmt, i);
1478      edge e = gimple_phi_arg_edge (phi_stmt, i);
1479
1480      if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1481	continue;
1482      if ((check_value_one ^ integer_onep (val)) == 1)
1483	continue;
1484      if (EDGE_COUNT (e->src->succs) != 1)
1485	{
1486	  predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1487	  continue;
1488	}
1489
1490      FOR_EACH_EDGE (e1, ei, e->src->preds)
1491	predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1492    }
1493}
1494
1495/* Predict edge probabilities by exploiting loop structure.  */
1496
1497static void
1498predict_loops (void)
1499{
1500  struct loop *loop;
1501
1502  /* Try to predict out blocks in a loop that are not part of a
1503     natural loop.  */
1504  FOR_EACH_LOOP (loop, 0)
1505    {
1506      basic_block bb, *bbs;
1507      unsigned j, n_exits;
1508      vec<edge> exits;
1509      struct tree_niter_desc niter_desc;
1510      edge ex;
1511      struct nb_iter_bound *nb_iter;
1512      enum tree_code loop_bound_code = ERROR_MARK;
1513      tree loop_bound_step = NULL;
1514      tree loop_bound_var = NULL;
1515      tree loop_iv_base = NULL;
1516      gimple stmt = NULL;
1517
1518      exits = get_loop_exit_edges (loop);
1519      n_exits = exits.length ();
1520      if (!n_exits)
1521	{
1522          exits.release ();
1523	  continue;
1524	}
1525
1526      FOR_EACH_VEC_ELT (exits, j, ex)
1527	{
1528	  tree niter = NULL;
1529	  HOST_WIDE_INT nitercst;
1530	  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1531	  int probability;
1532	  enum br_predictor predictor;
1533
1534	  predict_extra_loop_exits (ex);
1535
1536	  if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1537	    niter = niter_desc.niter;
1538	  if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1539	    niter = loop_niter_by_eval (loop, ex);
1540
1541	  if (TREE_CODE (niter) == INTEGER_CST)
1542	    {
1543	      if (tree_fits_uhwi_p (niter)
1544		  && max
1545		  && compare_tree_int (niter, max - 1) == -1)
1546		nitercst = tree_to_uhwi (niter) + 1;
1547	      else
1548		nitercst = max;
1549	      predictor = PRED_LOOP_ITERATIONS;
1550	    }
1551	  /* If we have just one exit and we can derive some information about
1552	     the number of iterations of the loop from the statements inside
1553	     the loop, use it to predict this exit.  */
1554	  else if (n_exits == 1)
1555	    {
1556	      nitercst = estimated_stmt_executions_int (loop);
1557	      if (nitercst < 0)
1558		continue;
1559	      if (nitercst > max)
1560		nitercst = max;
1561
1562	      predictor = PRED_LOOP_ITERATIONS_GUESSED;
1563	    }
1564	  else
1565	    continue;
1566
1567	  /* If the prediction for number of iterations is zero, do not
1568	     predict the exit edges.  */
1569	  if (nitercst == 0)
1570	    continue;
1571
1572	  probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1573	  predict_edge (ex, predictor, probability);
1574	}
1575      exits.release ();
1576
1577      /* Find information about loop bound variables.  */
1578      for (nb_iter = loop->bounds; nb_iter;
1579	   nb_iter = nb_iter->next)
1580	if (nb_iter->stmt
1581	    && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1582	  {
1583	    stmt = nb_iter->stmt;
1584	    break;
1585	  }
1586      if (!stmt && last_stmt (loop->header)
1587	  && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1588	stmt = last_stmt (loop->header);
1589      if (stmt)
1590	is_comparison_with_loop_invariant_p (stmt, loop,
1591					     &loop_bound_var,
1592					     &loop_bound_code,
1593					     &loop_bound_step,
1594					     &loop_iv_base);
1595
1596      bbs = get_loop_body (loop);
1597
1598      for (j = 0; j < loop->num_nodes; j++)
1599	{
1600	  int header_found = 0;
1601	  edge e;
1602	  edge_iterator ei;
1603
1604	  bb = bbs[j];
1605
1606	  /* Bypass loop heuristics on continue statement.  These
1607	     statements construct loops via "non-loop" constructs
1608	     in the source language and are better to be handled
1609	     separately.  */
1610	  if (predicted_by_p (bb, PRED_CONTINUE))
1611	    continue;
1612
1613	  /* Loop branch heuristics - predict an edge back to a
1614	     loop's head as taken.  */
1615	  if (bb == loop->latch)
1616	    {
1617	      e = find_edge (loop->latch, loop->header);
1618	      if (e)
1619		{
1620		  header_found = 1;
1621		  predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1622		}
1623	    }
1624
1625	  /* Loop exit heuristics - predict an edge exiting the loop if the
1626	     conditional has no loop header successors as not taken.  */
1627	  if (!header_found
1628	      /* If we already used more reliable loop exit predictors, do not
1629		 bother with PRED_LOOP_EXIT.  */
1630	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1631	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1632	    {
1633	      /* For loop with many exits we don't want to predict all exits
1634	         with the pretty large probability, because if all exits are
1635		 considered in row, the loop would be predicted to iterate
1636		 almost never.  The code to divide probability by number of
1637		 exits is very rough.  It should compute the number of exits
1638		 taken in each patch through function (not the overall number
1639		 of exits that might be a lot higher for loops with wide switch
1640		 statements in them) and compute n-th square root.
1641
1642		 We limit the minimal probability by 2% to avoid
1643		 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1644		 as this was causing regression in perl benchmark containing such
1645		 a wide loop.  */
1646
1647	      int probability = ((REG_BR_PROB_BASE
1648		                  - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1649				 / n_exits);
1650	      if (probability < HITRATE (2))
1651		probability = HITRATE (2);
1652	      FOR_EACH_EDGE (e, ei, bb->succs)
1653		if (e->dest->index < NUM_FIXED_BLOCKS
1654		    || !flow_bb_inside_loop_p (loop, e->dest))
1655		  predict_edge (e, PRED_LOOP_EXIT, probability);
1656	    }
1657	  if (loop_bound_var)
1658	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1659				   loop_bound_code,
1660				   tree_to_shwi (loop_bound_step));
1661	}
1662
1663      /* Free basic blocks from get_loop_body.  */
1664      free (bbs);
1665    }
1666}
1667
1668/* Attempt to predict probabilities of BB outgoing edges using local
1669   properties.  */
1670static void
1671bb_estimate_probability_locally (basic_block bb)
1672{
1673  rtx last_insn = BB_END (bb);
1674  rtx cond;
1675
1676  if (! can_predict_insn_p (last_insn))
1677    return;
1678  cond = get_condition (last_insn, NULL, false, false);
1679  if (! cond)
1680    return;
1681
1682  /* Try "pointer heuristic."
1683     A comparison ptr == 0 is predicted as false.
1684     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1685  if (COMPARISON_P (cond)
1686      && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1687	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1688    {
1689      if (GET_CODE (cond) == EQ)
1690	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1691      else if (GET_CODE (cond) == NE)
1692	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1693    }
1694  else
1695
1696  /* Try "opcode heuristic."
1697     EQ tests are usually false and NE tests are usually true. Also,
1698     most quantities are positive, so we can make the appropriate guesses
1699     about signed comparisons against zero.  */
1700    switch (GET_CODE (cond))
1701      {
1702      case CONST_INT:
1703	/* Unconditional branch.  */
1704	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1705			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
1706	break;
1707
1708      case EQ:
1709      case UNEQ:
1710	/* Floating point comparisons appears to behave in a very
1711	   unpredictable way because of special role of = tests in
1712	   FP code.  */
1713	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1714	  ;
1715	/* Comparisons with 0 are often used for booleans and there is
1716	   nothing useful to predict about them.  */
1717	else if (XEXP (cond, 1) == const0_rtx
1718		 || XEXP (cond, 0) == const0_rtx)
1719	  ;
1720	else
1721	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1722	break;
1723
1724      case NE:
1725      case LTGT:
1726	/* Floating point comparisons appears to behave in a very
1727	   unpredictable way because of special role of = tests in
1728	   FP code.  */
1729	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1730	  ;
1731	/* Comparisons with 0 are often used for booleans and there is
1732	   nothing useful to predict about them.  */
1733	else if (XEXP (cond, 1) == const0_rtx
1734		 || XEXP (cond, 0) == const0_rtx)
1735	  ;
1736	else
1737	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1738	break;
1739
1740      case ORDERED:
1741	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1742	break;
1743
1744      case UNORDERED:
1745	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1746	break;
1747
1748      case LE:
1749      case LT:
1750	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1751	    || XEXP (cond, 1) == constm1_rtx)
1752	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1753	break;
1754
1755      case GE:
1756      case GT:
1757	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1758	    || XEXP (cond, 1) == constm1_rtx)
1759	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1760	break;
1761
1762      default:
1763	break;
1764      }
1765}
1766
1767/* Set edge->probability for each successor edge of BB.  */
1768void
1769guess_outgoing_edge_probabilities (basic_block bb)
1770{
1771  bb_estimate_probability_locally (bb);
1772  combine_predictions_for_insn (BB_END (bb), bb);
1773}
1774
1775static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1776
1777/* Helper function for expr_expected_value.  */
1778
1779static tree
1780expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1781		       tree op1, bitmap visited, enum br_predictor *predictor)
1782{
1783  gimple def;
1784
1785  if (predictor)
1786    *predictor = PRED_UNCONDITIONAL;
1787
1788  if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1789    {
1790      if (TREE_CONSTANT (op0))
1791	return op0;
1792
1793      if (code != SSA_NAME)
1794	return NULL_TREE;
1795
1796      def = SSA_NAME_DEF_STMT (op0);
1797
1798      /* If we were already here, break the infinite cycle.  */
1799      if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1800	return NULL;
1801
1802      if (gimple_code (def) == GIMPLE_PHI)
1803	{
1804	  /* All the arguments of the PHI node must have the same constant
1805	     length.  */
1806	  int i, n = gimple_phi_num_args (def);
1807	  tree val = NULL, new_val;
1808
1809	  for (i = 0; i < n; i++)
1810	    {
1811	      tree arg = PHI_ARG_DEF (def, i);
1812	      enum br_predictor predictor2;
1813
1814	      /* If this PHI has itself as an argument, we cannot
1815		 determine the string length of this argument.  However,
1816		 if we can find an expected constant value for the other
1817		 PHI args then we can still be sure that this is
1818		 likely a constant.  So be optimistic and just
1819		 continue with the next argument.  */
1820	      if (arg == PHI_RESULT (def))
1821		continue;
1822
1823	      new_val = expr_expected_value (arg, visited, &predictor2);
1824
1825	      /* It is difficult to combine value predictors.  Simply assume
1826		 that later predictor is weaker and take its prediction.  */
1827	      if (predictor && *predictor < predictor2)
1828		*predictor = predictor2;
1829	      if (!new_val)
1830		return NULL;
1831	      if (!val)
1832		val = new_val;
1833	      else if (!operand_equal_p (val, new_val, false))
1834		return NULL;
1835	    }
1836	  return val;
1837	}
1838      if (is_gimple_assign (def))
1839	{
1840	  if (gimple_assign_lhs (def) != op0)
1841	    return NULL;
1842
1843	  return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1844					gimple_assign_rhs1 (def),
1845					gimple_assign_rhs_code (def),
1846					gimple_assign_rhs2 (def),
1847					visited, predictor);
1848	}
1849
1850      if (is_gimple_call (def))
1851	{
1852	  tree decl = gimple_call_fndecl (def);
1853	  if (!decl)
1854	    {
1855	      if (gimple_call_internal_p (def)
1856		  && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1857		{
1858		  gcc_assert (gimple_call_num_args (def) == 3);
1859		  tree val = gimple_call_arg (def, 0);
1860		  if (TREE_CONSTANT (val))
1861		    return val;
1862		  if (predictor)
1863		    {
1864		      *predictor = PRED_BUILTIN_EXPECT;
1865		      tree val2 = gimple_call_arg (def, 2);
1866		      gcc_assert (TREE_CODE (val2) == INTEGER_CST
1867				  && tree_fits_uhwi_p (val2)
1868				  && tree_to_uhwi (val2) < END_PREDICTORS);
1869		      *predictor = (enum br_predictor) tree_to_uhwi (val2);
1870		    }
1871		  return gimple_call_arg (def, 1);
1872		}
1873	      return NULL;
1874	    }
1875	  if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1876	    switch (DECL_FUNCTION_CODE (decl))
1877	      {
1878	      case BUILT_IN_EXPECT:
1879		{
1880		  tree val;
1881		  if (gimple_call_num_args (def) != 2)
1882		    return NULL;
1883		  val = gimple_call_arg (def, 0);
1884		  if (TREE_CONSTANT (val))
1885		    return val;
1886		  if (predictor)
1887		    *predictor = PRED_BUILTIN_EXPECT;
1888		  return gimple_call_arg (def, 1);
1889		}
1890
1891	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1892	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1893	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1894	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1895	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1896	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1897	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1898	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1899	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1900	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1901	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1902	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1903	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1904		/* Assume that any given atomic operation has low contention,
1905		   and thus the compare-and-swap operation succeeds.  */
1906		if (predictor)
1907		  *predictor = PRED_COMPARE_AND_SWAP;
1908		return boolean_true_node;
1909	    }
1910	}
1911
1912      return NULL;
1913    }
1914
1915  if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1916    {
1917      tree res;
1918      enum br_predictor predictor2;
1919      op0 = expr_expected_value (op0, visited, predictor);
1920      if (!op0)
1921	return NULL;
1922      op1 = expr_expected_value (op1, visited, &predictor2);
1923      if (predictor && *predictor < predictor2)
1924	*predictor = predictor2;
1925      if (!op1)
1926	return NULL;
1927      res = fold_build2 (code, type, op0, op1);
1928      if (TREE_CONSTANT (res))
1929	return res;
1930      return NULL;
1931    }
1932  if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1933    {
1934      tree res;
1935      op0 = expr_expected_value (op0, visited, predictor);
1936      if (!op0)
1937	return NULL;
1938      res = fold_build1 (code, type, op0);
1939      if (TREE_CONSTANT (res))
1940	return res;
1941      return NULL;
1942    }
1943  return NULL;
1944}
1945
1946/* Return constant EXPR will likely have at execution time, NULL if unknown.
1947   The function is used by builtin_expect branch predictor so the evidence
1948   must come from this construct and additional possible constant folding.
1949
1950   We may want to implement more involved value guess (such as value range
1951   propagation based prediction), but such tricks shall go to new
1952   implementation.  */
1953
1954static tree
1955expr_expected_value (tree expr, bitmap visited,
1956		     enum br_predictor *predictor)
1957{
1958  enum tree_code code;
1959  tree op0, op1;
1960
1961  if (TREE_CONSTANT (expr))
1962    {
1963      if (predictor)
1964	*predictor = PRED_UNCONDITIONAL;
1965      return expr;
1966    }
1967
1968  extract_ops_from_tree (expr, &code, &op0, &op1);
1969  return expr_expected_value_1 (TREE_TYPE (expr),
1970				op0, code, op1, visited, predictor);
1971}
1972
1973/* Predict using opcode of the last statement in basic block.  */
1974static void
1975tree_predict_by_opcode (basic_block bb)
1976{
1977  gimple stmt = last_stmt (bb);
1978  edge then_edge;
1979  tree op0, op1;
1980  tree type;
1981  tree val;
1982  enum tree_code cmp;
1983  bitmap visited;
1984  edge_iterator ei;
1985  enum br_predictor predictor;
1986
1987  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1988    return;
1989  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1990    if (then_edge->flags & EDGE_TRUE_VALUE)
1991      break;
1992  op0 = gimple_cond_lhs (stmt);
1993  op1 = gimple_cond_rhs (stmt);
1994  cmp = gimple_cond_code (stmt);
1995  type = TREE_TYPE (op0);
1996  visited = BITMAP_ALLOC (NULL);
1997  val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1998			       &predictor);
1999  BITMAP_FREE (visited);
2000  if (val && TREE_CODE (val) == INTEGER_CST)
2001    {
2002      if (predictor == PRED_BUILTIN_EXPECT)
2003	{
2004	  int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2005
2006	  gcc_assert (percent >= 0 && percent <= 100);
2007	  if (integer_zerop (val))
2008	    percent = 100 - percent;
2009	  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2010	}
2011      else
2012	predict_edge (then_edge, predictor,
2013		      integer_zerop (val) ? NOT_TAKEN : TAKEN);
2014    }
2015  /* Try "pointer heuristic."
2016     A comparison ptr == 0 is predicted as false.
2017     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2018  if (POINTER_TYPE_P (type))
2019    {
2020      if (cmp == EQ_EXPR)
2021	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2022      else if (cmp == NE_EXPR)
2023	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2024    }
2025  else
2026
2027  /* Try "opcode heuristic."
2028     EQ tests are usually false and NE tests are usually true. Also,
2029     most quantities are positive, so we can make the appropriate guesses
2030     about signed comparisons against zero.  */
2031    switch (cmp)
2032      {
2033      case EQ_EXPR:
2034      case UNEQ_EXPR:
2035	/* Floating point comparisons appears to behave in a very
2036	   unpredictable way because of special role of = tests in
2037	   FP code.  */
2038	if (FLOAT_TYPE_P (type))
2039	  ;
2040	/* Comparisons with 0 are often used for booleans and there is
2041	   nothing useful to predict about them.  */
2042	else if (integer_zerop (op0) || integer_zerop (op1))
2043	  ;
2044	else
2045	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2046	break;
2047
2048      case NE_EXPR:
2049      case LTGT_EXPR:
2050	/* Floating point comparisons appears to behave in a very
2051	   unpredictable way because of special role of = tests in
2052	   FP code.  */
2053	if (FLOAT_TYPE_P (type))
2054	  ;
2055	/* Comparisons with 0 are often used for booleans and there is
2056	   nothing useful to predict about them.  */
2057	else if (integer_zerop (op0)
2058		 || integer_zerop (op1))
2059	  ;
2060	else
2061	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2062	break;
2063
2064      case ORDERED_EXPR:
2065	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2066	break;
2067
2068      case UNORDERED_EXPR:
2069	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2070	break;
2071
2072      case LE_EXPR:
2073      case LT_EXPR:
2074	if (integer_zerop (op1)
2075	    || integer_onep (op1)
2076	    || integer_all_onesp (op1)
2077	    || real_zerop (op1)
2078	    || real_onep (op1)
2079	    || real_minus_onep (op1))
2080	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2081	break;
2082
2083      case GE_EXPR:
2084      case GT_EXPR:
2085	if (integer_zerop (op1)
2086	    || integer_onep (op1)
2087	    || integer_all_onesp (op1)
2088	    || real_zerop (op1)
2089	    || real_onep (op1)
2090	    || real_minus_onep (op1))
2091	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2092	break;
2093
2094      default:
2095	break;
2096      }
2097}
2098
2099/* Try to guess whether the value of return means error code.  */
2100
2101static enum br_predictor
2102return_prediction (tree val, enum prediction *prediction)
2103{
2104  /* VOID.  */
2105  if (!val)
2106    return PRED_NO_PREDICTION;
2107  /* Different heuristics for pointers and scalars.  */
2108  if (POINTER_TYPE_P (TREE_TYPE (val)))
2109    {
2110      /* NULL is usually not returned.  */
2111      if (integer_zerop (val))
2112	{
2113	  *prediction = NOT_TAKEN;
2114	  return PRED_NULL_RETURN;
2115	}
2116    }
2117  else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2118    {
2119      /* Negative return values are often used to indicate
2120         errors.  */
2121      if (TREE_CODE (val) == INTEGER_CST
2122	  && tree_int_cst_sgn (val) < 0)
2123	{
2124	  *prediction = NOT_TAKEN;
2125	  return PRED_NEGATIVE_RETURN;
2126	}
2127      /* Constant return values seems to be commonly taken.
2128         Zero/one often represent booleans so exclude them from the
2129	 heuristics.  */
2130      if (TREE_CONSTANT (val)
2131	  && (!integer_zerop (val) && !integer_onep (val)))
2132	{
2133	  *prediction = TAKEN;
2134	  return PRED_CONST_RETURN;
2135	}
2136    }
2137  return PRED_NO_PREDICTION;
2138}
2139
2140/* Find the basic block with return expression and look up for possible
2141   return value trying to apply RETURN_PREDICTION heuristics.  */
2142static void
2143apply_return_prediction (void)
2144{
2145  gimple return_stmt = NULL;
2146  tree return_val;
2147  edge e;
2148  gimple phi;
2149  int phi_num_args, i;
2150  enum br_predictor pred;
2151  enum prediction direction;
2152  edge_iterator ei;
2153
2154  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2155    {
2156      return_stmt = last_stmt (e->src);
2157      if (return_stmt
2158	  && gimple_code (return_stmt) == GIMPLE_RETURN)
2159	break;
2160    }
2161  if (!e)
2162    return;
2163  return_val = gimple_return_retval (return_stmt);
2164  if (!return_val)
2165    return;
2166  if (TREE_CODE (return_val) != SSA_NAME
2167      || !SSA_NAME_DEF_STMT (return_val)
2168      || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2169    return;
2170  phi = SSA_NAME_DEF_STMT (return_val);
2171  phi_num_args = gimple_phi_num_args (phi);
2172  pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2173
2174  /* Avoid the degenerate case where all return values form the function
2175     belongs to same category (ie they are all positive constants)
2176     so we can hardly say something about them.  */
2177  for (i = 1; i < phi_num_args; i++)
2178    if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2179      break;
2180  if (i != phi_num_args)
2181    for (i = 0; i < phi_num_args; i++)
2182      {
2183	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2184	if (pred != PRED_NO_PREDICTION)
2185	  predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2186				         direction);
2187      }
2188}
2189
2190/* Look for basic block that contains unlikely to happen events
2191   (such as noreturn calls) and mark all paths leading to execution
2192   of this basic blocks as unlikely.  */
2193
2194static void
2195tree_bb_level_predictions (void)
2196{
2197  basic_block bb;
2198  bool has_return_edges = false;
2199  edge e;
2200  edge_iterator ei;
2201
2202  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2203    if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2204      {
2205        has_return_edges = true;
2206	break;
2207      }
2208
2209  apply_return_prediction ();
2210
2211  FOR_EACH_BB_FN (bb, cfun)
2212    {
2213      gimple_stmt_iterator gsi;
2214
2215      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2216	{
2217	  gimple stmt = gsi_stmt (gsi);
2218	  tree decl;
2219
2220	  if (is_gimple_call (stmt))
2221	    {
2222	      if ((gimple_call_flags (stmt) & ECF_NORETURN)
2223	          && has_return_edges)
2224		predict_paths_leading_to (bb, PRED_NORETURN,
2225					  NOT_TAKEN);
2226	      decl = gimple_call_fndecl (stmt);
2227	      if (decl
2228		  && lookup_attribute ("cold",
2229				       DECL_ATTRIBUTES (decl)))
2230		predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2231					  NOT_TAKEN);
2232	    }
2233	  else if (gimple_code (stmt) == GIMPLE_PREDICT)
2234	    {
2235	      predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2236					gimple_predict_outcome (stmt));
2237	      /* Keep GIMPLE_PREDICT around so early inlining will propagate
2238	         hints to callers.  */
2239	    }
2240	}
2241    }
2242}
2243
2244#ifdef ENABLE_CHECKING
2245
2246/* Callback for pointer_map_traverse, asserts that the pointer map is
2247   empty.  */
2248
2249static bool
2250assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2251		 void *data ATTRIBUTE_UNUSED)
2252{
2253  gcc_assert (!*value);
2254  return false;
2255}
2256#endif
2257
2258/* Predict branch probabilities and estimate profile for basic block BB.  */
2259
2260static void
2261tree_estimate_probability_bb (basic_block bb)
2262{
2263  edge e;
2264  edge_iterator ei;
2265  gimple last;
2266
2267  FOR_EACH_EDGE (e, ei, bb->succs)
2268    {
2269      /* Predict edges to user labels with attributes.  */
2270      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2271	{
2272	  gimple_stmt_iterator gi;
2273	  for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2274	    {
2275	      gimple stmt = gsi_stmt (gi);
2276	      tree decl;
2277
2278	      if (gimple_code (stmt) != GIMPLE_LABEL)
2279		break;
2280	      decl = gimple_label_label (stmt);
2281	      if (DECL_ARTIFICIAL (decl))
2282		continue;
2283
2284	      /* Finally, we have a user-defined label.  */
2285	      if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2286		predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2287	      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2288		predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2289	    }
2290	}
2291
2292      /* Predict early returns to be probable, as we've already taken
2293	 care for error returns and other cases are often used for
2294	 fast paths through function.
2295
2296	 Since we've already removed the return statements, we are
2297	 looking for CFG like:
2298
2299	 if (conditional)
2300	 {
2301	 ..
2302	 goto return_block
2303	 }
2304	 some other blocks
2305	 return_block:
2306	 return_stmt.  */
2307      if (e->dest != bb->next_bb
2308	  && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2309	  && single_succ_p (e->dest)
2310	  && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2311	  && (last = last_stmt (e->dest)) != NULL
2312	  && gimple_code (last) == GIMPLE_RETURN)
2313	{
2314	  edge e1;
2315	  edge_iterator ei1;
2316
2317	  if (single_succ_p (bb))
2318	    {
2319	      FOR_EACH_EDGE (e1, ei1, bb->preds)
2320		if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2321		    && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2322		    && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2323		  predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2324	    }
2325	  else
2326	    if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2327		&& !predicted_by_p (e->src, PRED_CONST_RETURN)
2328		&& !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2329	      predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2330	}
2331
2332      /* Look for block we are guarding (ie we dominate it,
2333	 but it doesn't postdominate us).  */
2334      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2335	  && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2336	  && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2337	{
2338	  gimple_stmt_iterator bi;
2339
2340	  /* The call heuristic claims that a guarded function call
2341	     is improbable.  This is because such calls are often used
2342	     to signal exceptional situations such as printing error
2343	     messages.  */
2344	  for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2345	       gsi_next (&bi))
2346	    {
2347	      gimple stmt = gsi_stmt (bi);
2348	      if (is_gimple_call (stmt)
2349		  /* Constant and pure calls are hardly used to signalize
2350		     something exceptional.  */
2351		  && gimple_has_side_effects (stmt))
2352		{
2353		  predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2354		  break;
2355		}
2356	    }
2357	}
2358    }
2359  tree_predict_by_opcode (bb);
2360}
2361
2362/* Predict branch probabilities and estimate profile of the tree CFG.
2363   This function can be called from the loop optimizers to recompute
2364   the profile information.  */
2365
2366void
2367tree_estimate_probability (void)
2368{
2369  basic_block bb;
2370
2371  add_noreturn_fake_exit_edges ();
2372  connect_infinite_loops_to_exit ();
2373  /* We use loop_niter_by_eval, which requires that the loops have
2374     preheaders.  */
2375  create_preheaders (CP_SIMPLE_PREHEADERS);
2376  calculate_dominance_info (CDI_POST_DOMINATORS);
2377
2378  bb_predictions = pointer_map_create ();
2379  tree_bb_level_predictions ();
2380  record_loop_exits ();
2381
2382  if (number_of_loops (cfun) > 1)
2383    predict_loops ();
2384
2385  FOR_EACH_BB_FN (bb, cfun)
2386    tree_estimate_probability_bb (bb);
2387
2388  FOR_EACH_BB_FN (bb, cfun)
2389    combine_predictions_for_bb (bb);
2390
2391#ifdef ENABLE_CHECKING
2392  pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2393#endif
2394  pointer_map_destroy (bb_predictions);
2395  bb_predictions = NULL;
2396
2397  estimate_bb_frequencies (false);
2398  free_dominance_info (CDI_POST_DOMINATORS);
2399  remove_fake_exit_edges ();
2400}
2401
2402/* Predict edges to successors of CUR whose sources are not postdominated by
2403   BB by PRED and recurse to all postdominators.  */
2404
2405static void
2406predict_paths_for_bb (basic_block cur, basic_block bb,
2407		      enum br_predictor pred,
2408		      enum prediction taken,
2409		      bitmap visited)
2410{
2411  edge e;
2412  edge_iterator ei;
2413  basic_block son;
2414
2415  /* We are looking for all edges forming edge cut induced by
2416     set of all blocks postdominated by BB.  */
2417  FOR_EACH_EDGE (e, ei, cur->preds)
2418    if (e->src->index >= NUM_FIXED_BLOCKS
2419	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2420    {
2421      edge e2;
2422      edge_iterator ei2;
2423      bool found = false;
2424
2425      /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2426      if (e->flags & (EDGE_EH | EDGE_FAKE))
2427	continue;
2428      gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2429
2430      /* See if there is an edge from e->src that is not abnormal
2431	 and does not lead to BB.  */
2432      FOR_EACH_EDGE (e2, ei2, e->src->succs)
2433	if (e2 != e
2434	    && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2435	    && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2436	  {
2437	    found = true;
2438	    break;
2439	  }
2440
2441      /* If there is non-abnormal path leaving e->src, predict edge
2442	 using predictor.  Otherwise we need to look for paths
2443	 leading to e->src.
2444
2445	 The second may lead to infinite loop in the case we are predicitng
2446	 regions that are only reachable by abnormal edges.  We simply
2447	 prevent visiting given BB twice.  */
2448      if (found)
2449        predict_edge_def (e, pred, taken);
2450      else if (bitmap_set_bit (visited, e->src->index))
2451	predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2452    }
2453  for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2454       son;
2455       son = next_dom_son (CDI_POST_DOMINATORS, son))
2456    predict_paths_for_bb (son, bb, pred, taken, visited);
2457}
2458
2459/* Sets branch probabilities according to PREDiction and
2460   FLAGS.  */
2461
2462static void
2463predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2464			  enum prediction taken)
2465{
2466  bitmap visited = BITMAP_ALLOC (NULL);
2467  predict_paths_for_bb (bb, bb, pred, taken, visited);
2468  BITMAP_FREE (visited);
2469}
2470
2471/* Like predict_paths_leading_to but take edge instead of basic block.  */
2472
2473static void
2474predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2475			       enum prediction taken)
2476{
2477  bool has_nonloop_edge = false;
2478  edge_iterator ei;
2479  edge e2;
2480
2481  basic_block bb = e->src;
2482  FOR_EACH_EDGE (e2, ei, bb->succs)
2483    if (e2->dest != e->src && e2->dest != e->dest
2484	&& !(e->flags & (EDGE_EH | EDGE_FAKE))
2485	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2486      {
2487	has_nonloop_edge = true;
2488	break;
2489      }
2490  if (!has_nonloop_edge)
2491    {
2492      bitmap visited = BITMAP_ALLOC (NULL);
2493      predict_paths_for_bb (bb, bb, pred, taken, visited);
2494      BITMAP_FREE (visited);
2495    }
2496  else
2497    predict_edge_def (e, pred, taken);
2498}
2499
2500/* This is used to carry information about basic blocks.  It is
2501   attached to the AUX field of the standard CFG block.  */
2502
2503typedef struct block_info_def
2504{
2505  /* Estimated frequency of execution of basic_block.  */
2506  sreal frequency;
2507
2508  /* To keep queue of basic blocks to process.  */
2509  basic_block next;
2510
2511  /* Number of predecessors we need to visit first.  */
2512  int npredecessors;
2513} *block_info;
2514
2515/* Similar information for edges.  */
2516typedef struct edge_info_def
2517{
2518  /* In case edge is a loopback edge, the probability edge will be reached
2519     in case header is.  Estimated number of iterations of the loop can be
2520     then computed as 1 / (1 - back_edge_prob).  */
2521  sreal back_edge_prob;
2522  /* True if the edge is a loopback edge in the natural loop.  */
2523  unsigned int back_edge:1;
2524} *edge_info;
2525
2526#define BLOCK_INFO(B)	((block_info) (B)->aux)
2527#define EDGE_INFO(E)	((edge_info) (E)->aux)
2528
2529/* Helper function for estimate_bb_frequencies.
2530   Propagate the frequencies in blocks marked in
2531   TOVISIT, starting in HEAD.  */
2532
2533static void
2534propagate_freq (basic_block head, bitmap tovisit)
2535{
2536  basic_block bb;
2537  basic_block last;
2538  unsigned i;
2539  edge e;
2540  basic_block nextbb;
2541  bitmap_iterator bi;
2542
2543  /* For each basic block we need to visit count number of his predecessors
2544     we need to visit first.  */
2545  EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2546    {
2547      edge_iterator ei;
2548      int count = 0;
2549
2550      bb = BASIC_BLOCK_FOR_FN (cfun, i);
2551
2552      FOR_EACH_EDGE (e, ei, bb->preds)
2553	{
2554	  bool visit = bitmap_bit_p (tovisit, e->src->index);
2555
2556	  if (visit && !(e->flags & EDGE_DFS_BACK))
2557	    count++;
2558	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2559	    fprintf (dump_file,
2560		     "Irreducible region hit, ignoring edge to %i->%i\n",
2561		     e->src->index, bb->index);
2562	}
2563      BLOCK_INFO (bb)->npredecessors = count;
2564      /* When function never returns, we will never process exit block.  */
2565      if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2566	bb->count = bb->frequency = 0;
2567    }
2568
2569  memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2570  last = head;
2571  for (bb = head; bb; bb = nextbb)
2572    {
2573      edge_iterator ei;
2574      sreal cyclic_probability, frequency;
2575
2576      memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2577      memcpy (&frequency, &real_zero, sizeof (real_zero));
2578
2579      nextbb = BLOCK_INFO (bb)->next;
2580      BLOCK_INFO (bb)->next = NULL;
2581
2582      /* Compute frequency of basic block.  */
2583      if (bb != head)
2584	{
2585#ifdef ENABLE_CHECKING
2586	  FOR_EACH_EDGE (e, ei, bb->preds)
2587	    gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2588			|| (e->flags & EDGE_DFS_BACK));
2589#endif
2590
2591	  FOR_EACH_EDGE (e, ei, bb->preds)
2592	    if (EDGE_INFO (e)->back_edge)
2593	      {
2594		sreal_add (&cyclic_probability, &cyclic_probability,
2595			   &EDGE_INFO (e)->back_edge_prob);
2596	      }
2597	    else if (!(e->flags & EDGE_DFS_BACK))
2598	      {
2599		sreal tmp;
2600
2601		/*  frequency += (e->probability
2602				  * BLOCK_INFO (e->src)->frequency /
2603				  REG_BR_PROB_BASE);  */
2604
2605		sreal_init (&tmp, e->probability, 0);
2606		sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2607		sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2608		sreal_add (&frequency, &frequency, &tmp);
2609	      }
2610
2611	  if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2612	    {
2613	      memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2614		      sizeof (frequency));
2615	    }
2616	  else
2617	    {
2618	      if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2619		{
2620		  memcpy (&cyclic_probability, &real_almost_one,
2621			  sizeof (real_almost_one));
2622		}
2623
2624	      /* BLOCK_INFO (bb)->frequency = frequency
2625					      / (1 - cyclic_probability) */
2626
2627	      sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2628	      sreal_div (&BLOCK_INFO (bb)->frequency,
2629			 &frequency, &cyclic_probability);
2630	    }
2631	}
2632
2633      bitmap_clear_bit (tovisit, bb->index);
2634
2635      e = find_edge (bb, head);
2636      if (e)
2637	{
2638	  sreal tmp;
2639
2640	  /* EDGE_INFO (e)->back_edge_prob
2641	     = ((e->probability * BLOCK_INFO (bb)->frequency)
2642	     / REG_BR_PROB_BASE); */
2643
2644	  sreal_init (&tmp, e->probability, 0);
2645	  sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2646	  sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2647		     &tmp, &real_inv_br_prob_base);
2648	}
2649
2650      /* Propagate to successor blocks.  */
2651      FOR_EACH_EDGE (e, ei, bb->succs)
2652	if (!(e->flags & EDGE_DFS_BACK)
2653	    && BLOCK_INFO (e->dest)->npredecessors)
2654	  {
2655	    BLOCK_INFO (e->dest)->npredecessors--;
2656	    if (!BLOCK_INFO (e->dest)->npredecessors)
2657	      {
2658		if (!nextbb)
2659		  nextbb = e->dest;
2660		else
2661		  BLOCK_INFO (last)->next = e->dest;
2662
2663		last = e->dest;
2664	      }
2665	  }
2666    }
2667}
2668
2669/* Estimate frequencies in loops at same nest level.  */
2670
2671static void
2672estimate_loops_at_level (struct loop *first_loop)
2673{
2674  struct loop *loop;
2675
2676  for (loop = first_loop; loop; loop = loop->next)
2677    {
2678      edge e;
2679      basic_block *bbs;
2680      unsigned i;
2681      bitmap tovisit = BITMAP_ALLOC (NULL);
2682
2683      estimate_loops_at_level (loop->inner);
2684
2685      /* Find current loop back edge and mark it.  */
2686      e = loop_latch_edge (loop);
2687      EDGE_INFO (e)->back_edge = 1;
2688
2689      bbs = get_loop_body (loop);
2690      for (i = 0; i < loop->num_nodes; i++)
2691	bitmap_set_bit (tovisit, bbs[i]->index);
2692      free (bbs);
2693      propagate_freq (loop->header, tovisit);
2694      BITMAP_FREE (tovisit);
2695    }
2696}
2697
2698/* Propagates frequencies through structure of loops.  */
2699
2700static void
2701estimate_loops (void)
2702{
2703  bitmap tovisit = BITMAP_ALLOC (NULL);
2704  basic_block bb;
2705
2706  /* Start by estimating the frequencies in the loops.  */
2707  if (number_of_loops (cfun) > 1)
2708    estimate_loops_at_level (current_loops->tree_root->inner);
2709
2710  /* Now propagate the frequencies through all the blocks.  */
2711  FOR_ALL_BB_FN (bb, cfun)
2712    {
2713      bitmap_set_bit (tovisit, bb->index);
2714    }
2715  propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2716  BITMAP_FREE (tovisit);
2717}
2718
2719/* Drop the profile for NODE to guessed, and update its frequency based on
2720   whether it is expected to be hot given the CALL_COUNT.  */
2721
2722static void
2723drop_profile (struct cgraph_node *node, gcov_type call_count)
2724{
2725  struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2726  /* In the case where this was called by another function with a
2727     dropped profile, call_count will be 0. Since there are no
2728     non-zero call counts to this function, we don't know for sure
2729     whether it is hot, and therefore it will be marked normal below.  */
2730  bool hot = maybe_hot_count_p (NULL, call_count);
2731
2732  if (dump_file)
2733    fprintf (dump_file,
2734             "Dropping 0 profile for %s/%i. %s based on calls.\n",
2735             node->name (), node->order,
2736             hot ? "Function is hot" : "Function is normal");
2737  /* We only expect to miss profiles for functions that are reached
2738     via non-zero call edges in cases where the function may have
2739     been linked from another module or library (COMDATs and extern
2740     templates). See the comments below for handle_missing_profiles.
2741     Also, only warn in cases where the missing counts exceed the
2742     number of training runs. In certain cases with an execv followed
2743     by a no-return call the profile for the no-return call is not
2744     dumped and there can be a mismatch.  */
2745  if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2746      && call_count > profile_info->runs)
2747    {
2748      if (flag_profile_correction)
2749        {
2750          if (dump_file)
2751            fprintf (dump_file,
2752                     "Missing counts for called function %s/%i\n",
2753                     node->name (), node->order);
2754        }
2755      else
2756        warning (0, "Missing counts for called function %s/%i",
2757                 node->name (), node->order);
2758    }
2759
2760  profile_status_for_fn (fn)
2761      = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2762  node->frequency
2763      = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2764}
2765
2766/* In the case of COMDAT routines, multiple object files will contain the same
2767   function and the linker will select one for the binary. In that case
2768   all the other copies from the profile instrument binary will be missing
2769   profile counts. Look for cases where this happened, due to non-zero
2770   call counts going to 0-count functions, and drop the profile to guessed
2771   so that we can use the estimated probabilities and avoid optimizing only
2772   for size.
2773
2774   The other case where the profile may be missing is when the routine
2775   is not going to be emitted to the object file, e.g. for "extern template"
2776   class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2777   all other cases of non-zero calls to 0-count functions.  */
2778
2779void
2780handle_missing_profiles (void)
2781{
2782  struct cgraph_node *node;
2783  int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2784  vec<struct cgraph_node *> worklist;
2785  worklist.create (64);
2786
2787  /* See if 0 count function has non-0 count callers.  In this case we
2788     lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2789  FOR_EACH_DEFINED_FUNCTION (node)
2790    {
2791      struct cgraph_edge *e;
2792      gcov_type call_count = 0;
2793      gcov_type max_tp_first_run = 0;
2794      struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2795
2796      if (node->count)
2797        continue;
2798      for (e = node->callers; e; e = e->next_caller)
2799      {
2800        call_count += e->count;
2801
2802	if (e->caller->tp_first_run > max_tp_first_run)
2803	  max_tp_first_run = e->caller->tp_first_run;
2804      }
2805
2806      /* If time profile is missing, let assign the maximum that comes from
2807	 caller functions.  */
2808      if (!node->tp_first_run && max_tp_first_run)
2809	node->tp_first_run = max_tp_first_run + 1;
2810
2811      if (call_count
2812          && fn && fn->cfg
2813          && (call_count * unlikely_count_fraction >= profile_info->runs))
2814        {
2815          drop_profile (node, call_count);
2816          worklist.safe_push (node);
2817        }
2818    }
2819
2820  /* Propagate the profile dropping to other 0-count COMDATs that are
2821     potentially called by COMDATs we already dropped the profile on.  */
2822  while (worklist.length () > 0)
2823    {
2824      struct cgraph_edge *e;
2825
2826      node = worklist.pop ();
2827      for (e = node->callees; e; e = e->next_caller)
2828        {
2829          struct cgraph_node *callee = e->callee;
2830          struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2831
2832          if (callee->count > 0)
2833            continue;
2834          if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2835              && profile_status_for_fn (fn) == PROFILE_READ)
2836            {
2837              drop_profile (node, 0);
2838              worklist.safe_push (callee);
2839            }
2840        }
2841    }
2842  worklist.release ();
2843}
2844
2845/* Convert counts measured by profile driven feedback to frequencies.
2846   Return nonzero iff there was any nonzero execution count.  */
2847
2848int
2849counts_to_freqs (void)
2850{
2851  gcov_type count_max, true_count_max = 0;
2852  basic_block bb;
2853
2854  /* Don't overwrite the estimated frequencies when the profile for
2855     the function is missing.  We may drop this function PROFILE_GUESSED
2856     later in drop_profile ().  */
2857  if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2858    return 0;
2859
2860  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2861    true_count_max = MAX (bb->count, true_count_max);
2862
2863  count_max = MAX (true_count_max, 1);
2864  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2865    bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2866
2867  return true_count_max;
2868}
2869
2870/* Return true if function is likely to be expensive, so there is no point to
2871   optimize performance of prologue, epilogue or do inlining at the expense
2872   of code size growth.  THRESHOLD is the limit of number of instructions
2873   function can execute at average to be still considered not expensive.  */
2874
2875bool
2876expensive_function_p (int threshold)
2877{
2878  unsigned int sum = 0;
2879  basic_block bb;
2880  unsigned int limit;
2881
2882  /* We can not compute accurately for large thresholds due to scaled
2883     frequencies.  */
2884  gcc_assert (threshold <= BB_FREQ_MAX);
2885
2886  /* Frequencies are out of range.  This either means that function contains
2887     internal loop executing more than BB_FREQ_MAX times or profile feedback
2888     is available and function has not been executed at all.  */
2889  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2890    return true;
2891
2892  /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2893  limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2894  FOR_EACH_BB_FN (bb, cfun)
2895    {
2896      rtx insn;
2897
2898      FOR_BB_INSNS (bb, insn)
2899	if (active_insn_p (insn))
2900	  {
2901	    sum += bb->frequency;
2902	    if (sum > limit)
2903	      return true;
2904	}
2905    }
2906
2907  return false;
2908}
2909
2910/* Estimate and propagate basic block frequencies using the given branch
2911   probabilities.  If FORCE is true, the frequencies are used to estimate
2912   the counts even when there are already non-zero profile counts.  */
2913
2914void
2915estimate_bb_frequencies (bool force)
2916{
2917  basic_block bb;
2918  sreal freq_max;
2919
2920  if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2921    {
2922      static int real_values_initialized = 0;
2923
2924      if (!real_values_initialized)
2925        {
2926	  real_values_initialized = 1;
2927	  sreal_init (&real_zero, 0, 0);
2928	  sreal_init (&real_one, 1, 0);
2929	  sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2930	  sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2931	  sreal_init (&real_one_half, 1, -1);
2932	  sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2933	  sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2934	}
2935
2936      mark_dfs_back_edges ();
2937
2938      single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2939	 REG_BR_PROB_BASE;
2940
2941      /* Set up block info for each basic block.  */
2942      alloc_aux_for_blocks (sizeof (struct block_info_def));
2943      alloc_aux_for_edges (sizeof (struct edge_info_def));
2944      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2945	{
2946	  edge e;
2947	  edge_iterator ei;
2948
2949	  FOR_EACH_EDGE (e, ei, bb->succs)
2950	    {
2951	      sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2952	      sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2953			 &EDGE_INFO (e)->back_edge_prob,
2954			 &real_inv_br_prob_base);
2955	    }
2956	}
2957
2958      /* First compute frequencies locally for each loop from innermost
2959         to outermost to examine frequencies for back edges.  */
2960      estimate_loops ();
2961
2962      memcpy (&freq_max, &real_zero, sizeof (real_zero));
2963      FOR_EACH_BB_FN (bb, cfun)
2964	if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2965	  memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2966
2967      sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2968      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2969	{
2970	  sreal tmp;
2971
2972	  sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2973	  sreal_add (&tmp, &tmp, &real_one_half);
2974	  bb->frequency = sreal_to_int (&tmp);
2975	}
2976
2977      free_aux_for_blocks ();
2978      free_aux_for_edges ();
2979    }
2980  compute_function_frequency ();
2981}
2982
2983/* Decide whether function is hot, cold or unlikely executed.  */
2984void
2985compute_function_frequency (void)
2986{
2987  basic_block bb;
2988  struct cgraph_node *node = cgraph_node::get (current_function_decl);
2989
2990  if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2991      || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2992    node->only_called_at_startup = true;
2993  if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2994    node->only_called_at_exit = true;
2995
2996  if (profile_status_for_fn (cfun) != PROFILE_READ)
2997    {
2998      int flags = flags_from_decl_or_type (current_function_decl);
2999      if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3000	  != NULL)
3001        node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3002      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3003	       != NULL)
3004        node->frequency = NODE_FREQUENCY_HOT;
3005      else if (flags & ECF_NORETURN)
3006        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3007      else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3008        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3009      else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3010	       || DECL_STATIC_DESTRUCTOR (current_function_decl))
3011        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3012      return;
3013    }
3014
3015  /* Only first time try to drop function into unlikely executed.
3016     After inlining the roundoff errors may confuse us.
3017     Ipa-profile pass will drop functions only called from unlikely
3018     functions to unlikely and that is most of what we care about.  */
3019  if (!cfun->after_inlining)
3020    node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3021  FOR_EACH_BB_FN (bb, cfun)
3022    {
3023      if (maybe_hot_bb_p (cfun, bb))
3024	{
3025	  node->frequency = NODE_FREQUENCY_HOT;
3026	  return;
3027	}
3028      if (!probably_never_executed_bb_p (cfun, bb))
3029	node->frequency = NODE_FREQUENCY_NORMAL;
3030    }
3031}
3032
3033/* Build PREDICT_EXPR.  */
3034tree
3035build_predict_expr (enum br_predictor predictor, enum prediction taken)
3036{
3037  tree t = build1 (PREDICT_EXPR, void_type_node,
3038		   build_int_cst (integer_type_node, predictor));
3039  SET_PREDICT_EXPR_OUTCOME (t, taken);
3040  return t;
3041}
3042
3043const char *
3044predictor_name (enum br_predictor predictor)
3045{
3046  return predictor_info[predictor].name;
3047}
3048
3049/* Predict branch probabilities and estimate profile of the tree CFG. */
3050
3051namespace {
3052
3053const pass_data pass_data_profile =
3054{
3055  GIMPLE_PASS, /* type */
3056  "profile_estimate", /* name */
3057  OPTGROUP_NONE, /* optinfo_flags */
3058  TV_BRANCH_PROB, /* tv_id */
3059  PROP_cfg, /* properties_required */
3060  0, /* properties_provided */
3061  0, /* properties_destroyed */
3062  0, /* todo_flags_start */
3063  0, /* todo_flags_finish */
3064};
3065
3066class pass_profile : public gimple_opt_pass
3067{
3068public:
3069  pass_profile (gcc::context *ctxt)
3070    : gimple_opt_pass (pass_data_profile, ctxt)
3071  {}
3072
3073  /* opt_pass methods: */
3074  virtual bool gate (function *) { return flag_guess_branch_prob; }
3075  virtual unsigned int execute (function *);
3076
3077}; // class pass_profile
3078
3079unsigned int
3080pass_profile::execute (function *fun)
3081{
3082  unsigned nb_loops;
3083
3084  loop_optimizer_init (LOOPS_NORMAL);
3085  if (dump_file && (dump_flags & TDF_DETAILS))
3086    flow_loops_dump (dump_file, NULL, 0);
3087
3088  mark_irreducible_loops ();
3089
3090  nb_loops = number_of_loops (fun);
3091  if (nb_loops > 1)
3092    scev_initialize ();
3093
3094  tree_estimate_probability ();
3095
3096  if (nb_loops > 1)
3097    scev_finalize ();
3098
3099  loop_optimizer_finalize ();
3100  if (dump_file && (dump_flags & TDF_DETAILS))
3101    gimple_dump_cfg (dump_file, dump_flags);
3102 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3103    profile_status_for_fn (fun) = PROFILE_GUESSED;
3104  return 0;
3105}
3106
3107} // anon namespace
3108
3109gimple_opt_pass *
3110make_pass_profile (gcc::context *ctxt)
3111{
3112  return new pass_profile (ctxt);
3113}
3114
3115namespace {
3116
3117const pass_data pass_data_strip_predict_hints =
3118{
3119  GIMPLE_PASS, /* type */
3120  "*strip_predict_hints", /* name */
3121  OPTGROUP_NONE, /* optinfo_flags */
3122  TV_BRANCH_PROB, /* tv_id */
3123  PROP_cfg, /* properties_required */
3124  0, /* properties_provided */
3125  0, /* properties_destroyed */
3126  0, /* todo_flags_start */
3127  0, /* todo_flags_finish */
3128};
3129
3130class pass_strip_predict_hints : public gimple_opt_pass
3131{
3132public:
3133  pass_strip_predict_hints (gcc::context *ctxt)
3134    : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3135  {}
3136
3137  /* opt_pass methods: */
3138  opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3139  virtual unsigned int execute (function *);
3140
3141}; // class pass_strip_predict_hints
3142
3143/* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3144   we no longer need.  */
3145unsigned int
3146pass_strip_predict_hints::execute (function *fun)
3147{
3148  basic_block bb;
3149  gimple ass_stmt;
3150  tree var;
3151
3152  FOR_EACH_BB_FN (bb, fun)
3153    {
3154      gimple_stmt_iterator bi;
3155      for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3156	{
3157	  gimple stmt = gsi_stmt (bi);
3158
3159	  if (gimple_code (stmt) == GIMPLE_PREDICT)
3160	    {
3161	      gsi_remove (&bi, true);
3162	      continue;
3163	    }
3164	  else if (is_gimple_call (stmt))
3165	    {
3166	      tree fndecl = gimple_call_fndecl (stmt);
3167
3168	      if ((fndecl
3169		   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3170		   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3171		   && gimple_call_num_args (stmt) == 2)
3172		  || (gimple_call_internal_p (stmt)
3173		      && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3174		{
3175		  var = gimple_call_lhs (stmt);
3176		  if (var)
3177		    {
3178		      ass_stmt
3179			= gimple_build_assign (var, gimple_call_arg (stmt, 0));
3180		      gsi_replace (&bi, ass_stmt, true);
3181		    }
3182		  else
3183		    {
3184		      gsi_remove (&bi, true);
3185		      continue;
3186		    }
3187		}
3188	    }
3189	  gsi_next (&bi);
3190	}
3191    }
3192  return 0;
3193}
3194
3195} // anon namespace
3196
3197gimple_opt_pass *
3198make_pass_strip_predict_hints (gcc::context *ctxt)
3199{
3200  return new pass_strip_predict_hints (ctxt);
3201}
3202
3203/* Rebuild function frequencies.  Passes are in general expected to
3204   maintain profile by hand, however in some cases this is not possible:
3205   for example when inlining several functions with loops freuqencies might run
3206   out of scale and thus needs to be recomputed.  */
3207
3208void
3209rebuild_frequencies (void)
3210{
3211  timevar_push (TV_REBUILD_FREQUENCIES);
3212
3213  /* When the max bb count in the function is small, there is a higher
3214     chance that there were truncation errors in the integer scaling
3215     of counts by inlining and other optimizations. This could lead
3216     to incorrect classification of code as being cold when it isn't.
3217     In that case, force the estimation of bb counts/frequencies from the
3218     branch probabilities, rather than computing frequencies from counts,
3219     which may also lead to frequencies incorrectly reduced to 0. There
3220     is less precision in the probabilities, so we only do this for small
3221     max counts.  */
3222  gcov_type count_max = 0;
3223  basic_block bb;
3224  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3225    count_max = MAX (bb->count, count_max);
3226
3227  if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3228      || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3229    {
3230      loop_optimizer_init (0);
3231      add_noreturn_fake_exit_edges ();
3232      mark_irreducible_loops ();
3233      connect_infinite_loops_to_exit ();
3234      estimate_bb_frequencies (true);
3235      remove_fake_exit_edges ();
3236      loop_optimizer_finalize ();
3237    }
3238  else if (profile_status_for_fn (cfun) == PROFILE_READ)
3239    counts_to_freqs ();
3240  else
3241    gcc_unreachable ();
3242  timevar_pop (TV_REBUILD_FREQUENCIES);
3243}
3244