1/* Branch prediction routines for the GNU compiler.
2   Copyright (C) 2000-2014 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* References:
21
22   [1] "Branch Prediction for Free"
23       Ball and Larus; PLDI '93.
24   [2] "Static Branch Frequency and Program Profile Analysis"
25       Wu and Larus; MICRO-27.
26   [3] "Corpus-based Static Branch Prediction"
27       Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
28
29
30#include "config.h"
31#include "system.h"
32#include "coretypes.h"
33#include "tm.h"
34#include "tree.h"
35#include "calls.h"
36#include "rtl.h"
37#include "tm_p.h"
38#include "hard-reg-set.h"
39#include "basic-block.h"
40#include "insn-config.h"
41#include "regs.h"
42#include "flags.h"
43#include "function.h"
44#include "except.h"
45#include "diagnostic-core.h"
46#include "recog.h"
47#include "expr.h"
48#include "predict.h"
49#include "coverage.h"
50#include "sreal.h"
51#include "params.h"
52#include "target.h"
53#include "cfgloop.h"
54#include "pointer-set.h"
55#include "tree-ssa-alias.h"
56#include "internal-fn.h"
57#include "gimple-expr.h"
58#include "is-a.h"
59#include "gimple.h"
60#include "gimple-iterator.h"
61#include "gimple-ssa.h"
62#include "cgraph.h"
63#include "tree-cfg.h"
64#include "tree-phinodes.h"
65#include "ssa-iterators.h"
66#include "tree-ssa-loop-niter.h"
67#include "tree-ssa-loop.h"
68#include "tree-pass.h"
69#include "tree-scalar-evolution.h"
70#include "cfgloop.h"
71
72/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
73		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
74static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
75	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
76
77static void combine_predictions_for_insn (rtx, basic_block);
78static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
79static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
80static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
81static bool can_predict_insn_p (const_rtx);
82
83/* Information we hold about each branch predictor.
84   Filled using information from predict.def.  */
85
86struct predictor_info
87{
88  const char *const name;	/* Name used in the debugging dumps.  */
89  const int hitrate;		/* Expected hitrate used by
90				   predict_insn_def call.  */
91  const int flags;
92};
93
94/* Use given predictor without Dempster-Shaffer theory if it matches
95   using first_match heuristics.  */
96#define PRED_FLAG_FIRST_MATCH 1
97
98/* Recompute hitrate in percent to our representation.  */
99
100#define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101
102#define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
103static const struct predictor_info predictor_info[]= {
104#include "predict.def"
105
106  /* Upper bound on predictors.  */
107  {NULL, 0, 0}
108};
109#undef DEF_PREDICTOR
110
111/* Return TRUE if frequency FREQ is considered to be hot.  */
112
113static inline bool
114maybe_hot_frequency_p (struct function *fun, int freq)
115{
116  struct cgraph_node *node = cgraph_get_node (fun->decl);
117  if (!profile_info || !flag_branch_probabilities)
118    {
119      if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
120        return false;
121      if (node->frequency == NODE_FREQUENCY_HOT)
122        return true;
123    }
124  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
125    return true;
126  if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
127      && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
128    return false;
129  if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
130    return false;
131  if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
132	      / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
133    return false;
134  return true;
135}
136
137static gcov_type min_count = -1;
138
139/* Determine the threshold for hot BB counts.  */
140
141gcov_type
142get_hot_bb_threshold ()
143{
144  gcov_working_set_t *ws;
145  if (min_count == -1)
146    {
147      ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
148      gcc_assert (ws);
149      min_count = ws->min_counter;
150    }
151  return min_count;
152}
153
154/* Set the threshold for hot BB counts.  */
155
156void
157set_hot_bb_threshold (gcov_type min)
158{
159  min_count = min;
160}
161
162/* Return TRUE if frequency FREQ is considered to be hot.  */
163
164static inline bool
165maybe_hot_count_p (struct function *fun, gcov_type count)
166{
167  if (fun && profile_status_for_fn (fun) != PROFILE_READ)
168    return true;
169  /* Code executed at most once is not hot.  */
170  if (profile_info->runs >= count)
171    return false;
172  return (count >= get_hot_bb_threshold ());
173}
174
175/* Return true in case BB can be CPU intensive and should be optimized
176   for maximal performance.  */
177
178bool
179maybe_hot_bb_p (struct function *fun, const_basic_block bb)
180{
181  gcc_checking_assert (fun);
182  if (profile_status_for_fn (fun) == PROFILE_READ)
183    return maybe_hot_count_p (fun, bb->count);
184  return maybe_hot_frequency_p (fun, bb->frequency);
185}
186
187/* Return true if the call can be hot.  */
188
189bool
190cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
191{
192  if (profile_info && flag_branch_probabilities
193      && !maybe_hot_count_p (NULL,
194                             edge->count))
195    return false;
196  if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
197      || (edge->callee
198	  && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
199    return false;
200  if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
201      && (edge->callee
202	  && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
203    return false;
204  if (optimize_size)
205    return false;
206  if (edge->caller->frequency == NODE_FREQUENCY_HOT)
207    return true;
208  if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
209      && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
210    return false;
211  if (flag_guess_branch_prob)
212    {
213      if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
214	  || edge->frequency <= (CGRAPH_FREQ_BASE
215				 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
216        return false;
217    }
218  return true;
219}
220
221/* Return true in case BB can be CPU intensive and should be optimized
222   for maximal performance.  */
223
224bool
225maybe_hot_edge_p (edge e)
226{
227  if (profile_status_for_fn (cfun) == PROFILE_READ)
228    return maybe_hot_count_p (cfun, e->count);
229  return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
230}
231
232
233
234/* Return true if profile COUNT and FREQUENCY, or function FUN static
235   node frequency reflects never being executed.  */
236
237static bool
238probably_never_executed (struct function *fun,
239                         gcov_type count, int frequency)
240{
241  gcc_checking_assert (fun);
242  if (profile_status_for_fn (cfun) == PROFILE_READ)
243    {
244      int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
245      if (count * unlikely_count_fraction >= profile_info->runs)
246	return false;
247      if (!frequency)
248	return true;
249      if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
250	return false;
251      if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
252	{
253          gcov_type computed_count;
254          /* Check for possibility of overflow, in which case entry bb count
255             is large enough to do the division first without losing much
256             precision.  */
257	  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
258	      REG_BR_PROB_BASE)
259            {
260              gcov_type scaled_count
261		  = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
262	     unlikely_count_fraction;
263	      computed_count = RDIV (scaled_count,
264				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
265            }
266          else
267            {
268	      computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
269				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
270              computed_count *= frequency * unlikely_count_fraction;
271            }
272          if (computed_count >= profile_info->runs)
273            return false;
274	}
275      return true;
276    }
277  if ((!profile_info || !flag_branch_probabilities)
278      && (cgraph_get_node (fun->decl)->frequency
279	  == NODE_FREQUENCY_UNLIKELY_EXECUTED))
280    return true;
281  return false;
282}
283
284
285/* Return true in case BB is probably never executed.  */
286
287bool
288probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
289{
290  return probably_never_executed (fun, bb->count, bb->frequency);
291}
292
293
294/* Return true in case edge E is probably never executed.  */
295
296bool
297probably_never_executed_edge_p (struct function *fun, edge e)
298{
299  return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
300}
301
302/* Return true if NODE should be optimized for size.  */
303
304bool
305cgraph_optimize_for_size_p (struct cgraph_node *node)
306{
307  if (optimize_size)
308    return true;
309  if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
310    return true;
311  else
312    return false;
313}
314
315/* Return true when current function should always be optimized for size.  */
316
317bool
318optimize_function_for_size_p (struct function *fun)
319{
320  if (optimize_size)
321    return true;
322  if (!fun || !fun->decl)
323    return false;
324  return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
325}
326
327/* Return true when current function should always be optimized for speed.  */
328
329bool
330optimize_function_for_speed_p (struct function *fun)
331{
332  return !optimize_function_for_size_p (fun);
333}
334
335/* Return TRUE when BB should be optimized for size.  */
336
337bool
338optimize_bb_for_size_p (const_basic_block bb)
339{
340  return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
341}
342
343/* Return TRUE when BB should be optimized for speed.  */
344
345bool
346optimize_bb_for_speed_p (const_basic_block bb)
347{
348  return !optimize_bb_for_size_p (bb);
349}
350
351/* Return TRUE when BB should be optimized for size.  */
352
353bool
354optimize_edge_for_size_p (edge e)
355{
356  return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
357}
358
359/* Return TRUE when BB should be optimized for speed.  */
360
361bool
362optimize_edge_for_speed_p (edge e)
363{
364  return !optimize_edge_for_size_p (e);
365}
366
367/* Return TRUE when BB should be optimized for size.  */
368
369bool
370optimize_insn_for_size_p (void)
371{
372  return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
373}
374
375/* Return TRUE when BB should be optimized for speed.  */
376
377bool
378optimize_insn_for_speed_p (void)
379{
380  return !optimize_insn_for_size_p ();
381}
382
383/* Return TRUE when LOOP should be optimized for size.  */
384
385bool
386optimize_loop_for_size_p (struct loop *loop)
387{
388  return optimize_bb_for_size_p (loop->header);
389}
390
391/* Return TRUE when LOOP should be optimized for speed.  */
392
393bool
394optimize_loop_for_speed_p (struct loop *loop)
395{
396  return optimize_bb_for_speed_p (loop->header);
397}
398
399/* Return TRUE when LOOP nest should be optimized for speed.  */
400
401bool
402optimize_loop_nest_for_speed_p (struct loop *loop)
403{
404  struct loop *l = loop;
405  if (optimize_loop_for_speed_p (loop))
406    return true;
407  l = loop->inner;
408  while (l && l != loop)
409    {
410      if (optimize_loop_for_speed_p (l))
411        return true;
412      if (l->inner)
413        l = l->inner;
414      else if (l->next)
415        l = l->next;
416      else
417        {
418	  while (l != loop && !l->next)
419	    l = loop_outer (l);
420	  if (l != loop)
421	    l = l->next;
422	}
423    }
424  return false;
425}
426
427/* Return TRUE when LOOP nest should be optimized for size.  */
428
429bool
430optimize_loop_nest_for_size_p (struct loop *loop)
431{
432  return !optimize_loop_nest_for_speed_p (loop);
433}
434
435/* Return true when edge E is likely to be well predictable by branch
436   predictor.  */
437
438bool
439predictable_edge_p (edge e)
440{
441  if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
442    return false;
443  if ((e->probability
444       <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
445      || (REG_BR_PROB_BASE - e->probability
446          <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
447    return true;
448  return false;
449}
450
451
452/* Set RTL expansion for BB profile.  */
453
454void
455rtl_profile_for_bb (basic_block bb)
456{
457  crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
458}
459
460/* Set RTL expansion for edge profile.  */
461
462void
463rtl_profile_for_edge (edge e)
464{
465  crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
466}
467
468/* Set RTL expansion to default mode (i.e. when profile info is not known).  */
469void
470default_rtl_profile (void)
471{
472  crtl->maybe_hot_insn_p = true;
473}
474
475/* Return true if the one of outgoing edges is already predicted by
476   PREDICTOR.  */
477
478bool
479rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
480{
481  rtx note;
482  if (!INSN_P (BB_END (bb)))
483    return false;
484  for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
485    if (REG_NOTE_KIND (note) == REG_BR_PRED
486	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
487      return true;
488  return false;
489}
490
491/* This map contains for a basic block the list of predictions for the
492   outgoing edges.  */
493
494static struct pointer_map_t *bb_predictions;
495
496/*  Structure representing predictions in tree level. */
497
498struct edge_prediction {
499    struct edge_prediction *ep_next;
500    edge ep_edge;
501    enum br_predictor ep_predictor;
502    int ep_probability;
503};
504
505/* Return true if the one of outgoing edges is already predicted by
506   PREDICTOR.  */
507
508bool
509gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
510{
511  struct edge_prediction *i;
512  void **preds = pointer_map_contains (bb_predictions, bb);
513
514  if (!preds)
515    return false;
516
517  for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
518    if (i->ep_predictor == predictor)
519      return true;
520  return false;
521}
522
523/* Return true when the probability of edge is reliable.
524
525   The profile guessing code is good at predicting branch outcome (ie.
526   taken/not taken), that is predicted right slightly over 75% of time.
527   It is however notoriously poor on predicting the probability itself.
528   In general the profile appear a lot flatter (with probabilities closer
529   to 50%) than the reality so it is bad idea to use it to drive optimization
530   such as those disabling dynamic branch prediction for well predictable
531   branches.
532
533   There are two exceptions - edges leading to noreturn edges and edges
534   predicted by number of iterations heuristics are predicted well.  This macro
535   should be able to distinguish those, but at the moment it simply check for
536   noreturn heuristic that is only one giving probability over 99% or bellow
537   1%.  In future we might want to propagate reliability information across the
538   CFG if we find this information useful on multiple places.   */
539static bool
540probability_reliable_p (int prob)
541{
542  return (profile_status_for_fn (cfun) == PROFILE_READ
543	  || (profile_status_for_fn (cfun) == PROFILE_GUESSED
544	      && (prob <= HITRATE (1) || prob >= HITRATE (99))));
545}
546
547/* Same predicate as above, working on edges.  */
548bool
549edge_probability_reliable_p (const_edge e)
550{
551  return probability_reliable_p (e->probability);
552}
553
554/* Same predicate as edge_probability_reliable_p, working on notes.  */
555bool
556br_prob_note_reliable_p (const_rtx note)
557{
558  gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
559  return probability_reliable_p (XINT (note, 0));
560}
561
562static void
563predict_insn (rtx insn, enum br_predictor predictor, int probability)
564{
565  gcc_assert (any_condjump_p (insn));
566  if (!flag_guess_branch_prob)
567    return;
568
569  add_reg_note (insn, REG_BR_PRED,
570		gen_rtx_CONCAT (VOIDmode,
571				GEN_INT ((int) predictor),
572				GEN_INT ((int) probability)));
573}
574
575/* Predict insn by given predictor.  */
576
577void
578predict_insn_def (rtx insn, enum br_predictor predictor,
579		  enum prediction taken)
580{
581   int probability = predictor_info[(int) predictor].hitrate;
582
583   if (taken != TAKEN)
584     probability = REG_BR_PROB_BASE - probability;
585
586   predict_insn (insn, predictor, probability);
587}
588
589/* Predict edge E with given probability if possible.  */
590
591void
592rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
593{
594  rtx last_insn;
595  last_insn = BB_END (e->src);
596
597  /* We can store the branch prediction information only about
598     conditional jumps.  */
599  if (!any_condjump_p (last_insn))
600    return;
601
602  /* We always store probability of branching.  */
603  if (e->flags & EDGE_FALLTHRU)
604    probability = REG_BR_PROB_BASE - probability;
605
606  predict_insn (last_insn, predictor, probability);
607}
608
609/* Predict edge E with the given PROBABILITY.  */
610void
611gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
612{
613  gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
614  if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
615       1)
616      && flag_guess_branch_prob && optimize)
617    {
618      struct edge_prediction *i = XNEW (struct edge_prediction);
619      void **preds = pointer_map_insert (bb_predictions, e->src);
620
621      i->ep_next = (struct edge_prediction *) *preds;
622      *preds = i;
623      i->ep_probability = probability;
624      i->ep_predictor = predictor;
625      i->ep_edge = e;
626    }
627}
628
629/* Remove all predictions on given basic block that are attached
630   to edge E.  */
631void
632remove_predictions_associated_with_edge (edge e)
633{
634  void **preds;
635
636  if (!bb_predictions)
637    return;
638
639  preds = pointer_map_contains (bb_predictions, e->src);
640
641  if (preds)
642    {
643      struct edge_prediction **prediction = (struct edge_prediction **) preds;
644      struct edge_prediction *next;
645
646      while (*prediction)
647	{
648	  if ((*prediction)->ep_edge == e)
649	    {
650	      next = (*prediction)->ep_next;
651	      free (*prediction);
652	      *prediction = next;
653	    }
654	  else
655	    prediction = &((*prediction)->ep_next);
656	}
657    }
658}
659
660/* Clears the list of predictions stored for BB.  */
661
662static void
663clear_bb_predictions (basic_block bb)
664{
665  void **preds = pointer_map_contains (bb_predictions, bb);
666  struct edge_prediction *pred, *next;
667
668  if (!preds)
669    return;
670
671  for (pred = (struct edge_prediction *) *preds; pred; pred = next)
672    {
673      next = pred->ep_next;
674      free (pred);
675    }
676  *preds = NULL;
677}
678
679/* Return true when we can store prediction on insn INSN.
680   At the moment we represent predictions only on conditional
681   jumps, not at computed jump or other complicated cases.  */
682static bool
683can_predict_insn_p (const_rtx insn)
684{
685  return (JUMP_P (insn)
686	  && any_condjump_p (insn)
687	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
688}
689
690/* Predict edge E by given predictor if possible.  */
691
692void
693predict_edge_def (edge e, enum br_predictor predictor,
694		  enum prediction taken)
695{
696   int probability = predictor_info[(int) predictor].hitrate;
697
698   if (taken != TAKEN)
699     probability = REG_BR_PROB_BASE - probability;
700
701   predict_edge (e, predictor, probability);
702}
703
704/* Invert all branch predictions or probability notes in the INSN.  This needs
705   to be done each time we invert the condition used by the jump.  */
706
707void
708invert_br_probabilities (rtx insn)
709{
710  rtx note;
711
712  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
713    if (REG_NOTE_KIND (note) == REG_BR_PROB)
714      XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
715    else if (REG_NOTE_KIND (note) == REG_BR_PRED)
716      XEXP (XEXP (note, 0), 1)
717	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
718}
719
720/* Dump information about the branch prediction to the output file.  */
721
722static void
723dump_prediction (FILE *file, enum br_predictor predictor, int probability,
724		 basic_block bb, int used)
725{
726  edge e;
727  edge_iterator ei;
728
729  if (!file)
730    return;
731
732  FOR_EACH_EDGE (e, ei, bb->succs)
733    if (! (e->flags & EDGE_FALLTHRU))
734      break;
735
736  fprintf (file, "  %s heuristics%s: %.1f%%",
737	   predictor_info[predictor].name,
738	   used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
739
740  if (bb->count)
741    {
742      fprintf (file, "  exec %"PRId64, bb->count);
743      if (e)
744	{
745	  fprintf (file, " hit %"PRId64, e->count);
746	  fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
747	}
748    }
749
750  fprintf (file, "\n");
751}
752
753/* We can not predict the probabilities of outgoing edges of bb.  Set them
754   evenly and hope for the best.  */
755static void
756set_even_probabilities (basic_block bb)
757{
758  int nedges = 0;
759  edge e;
760  edge_iterator ei;
761
762  FOR_EACH_EDGE (e, ei, bb->succs)
763    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
764      nedges ++;
765  FOR_EACH_EDGE (e, ei, bb->succs)
766    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
767      e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
768    else
769      e->probability = 0;
770}
771
772/* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
773   note if not already present.  Remove now useless REG_BR_PRED notes.  */
774
775static void
776combine_predictions_for_insn (rtx insn, basic_block bb)
777{
778  rtx prob_note;
779  rtx *pnote;
780  rtx note;
781  int best_probability = PROB_EVEN;
782  enum br_predictor best_predictor = END_PREDICTORS;
783  int combined_probability = REG_BR_PROB_BASE / 2;
784  int d;
785  bool first_match = false;
786  bool found = false;
787
788  if (!can_predict_insn_p (insn))
789    {
790      set_even_probabilities (bb);
791      return;
792    }
793
794  prob_note = find_reg_note (insn, REG_BR_PROB, 0);
795  pnote = &REG_NOTES (insn);
796  if (dump_file)
797    fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
798	     bb->index);
799
800  /* We implement "first match" heuristics and use probability guessed
801     by predictor with smallest index.  */
802  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
803    if (REG_NOTE_KIND (note) == REG_BR_PRED)
804      {
805	enum br_predictor predictor = ((enum br_predictor)
806				       INTVAL (XEXP (XEXP (note, 0), 0)));
807	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
808
809	found = true;
810	if (best_predictor > predictor)
811	  best_probability = probability, best_predictor = predictor;
812
813	d = (combined_probability * probability
814	     + (REG_BR_PROB_BASE - combined_probability)
815	     * (REG_BR_PROB_BASE - probability));
816
817	/* Use FP math to avoid overflows of 32bit integers.  */
818	if (d == 0)
819	  /* If one probability is 0% and one 100%, avoid division by zero.  */
820	  combined_probability = REG_BR_PROB_BASE / 2;
821	else
822	  combined_probability = (((double) combined_probability) * probability
823				  * REG_BR_PROB_BASE / d + 0.5);
824      }
825
826  /* Decide which heuristic to use.  In case we didn't match anything,
827     use no_prediction heuristic, in case we did match, use either
828     first match or Dempster-Shaffer theory depending on the flags.  */
829
830  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
831    first_match = true;
832
833  if (!found)
834    dump_prediction (dump_file, PRED_NO_PREDICTION,
835		     combined_probability, bb, true);
836  else
837    {
838      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
839		       bb, !first_match);
840      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
841		       bb, first_match);
842    }
843
844  if (first_match)
845    combined_probability = best_probability;
846  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
847
848  while (*pnote)
849    {
850      if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
851	{
852	  enum br_predictor predictor = ((enum br_predictor)
853					 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
854	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
855
856	  dump_prediction (dump_file, predictor, probability, bb,
857			   !first_match || best_predictor == predictor);
858	  *pnote = XEXP (*pnote, 1);
859	}
860      else
861	pnote = &XEXP (*pnote, 1);
862    }
863
864  if (!prob_note)
865    {
866      add_int_reg_note (insn, REG_BR_PROB, combined_probability);
867
868      /* Save the prediction into CFG in case we are seeing non-degenerated
869	 conditional jump.  */
870      if (!single_succ_p (bb))
871	{
872	  BRANCH_EDGE (bb)->probability = combined_probability;
873	  FALLTHRU_EDGE (bb)->probability
874	    = REG_BR_PROB_BASE - combined_probability;
875	}
876    }
877  else if (!single_succ_p (bb))
878    {
879      int prob = XINT (prob_note, 0);
880
881      BRANCH_EDGE (bb)->probability = prob;
882      FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
883    }
884  else
885    single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
886}
887
888/* Combine predictions into single probability and store them into CFG.
889   Remove now useless prediction entries.  */
890
891static void
892combine_predictions_for_bb (basic_block bb)
893{
894  int best_probability = PROB_EVEN;
895  enum br_predictor best_predictor = END_PREDICTORS;
896  int combined_probability = REG_BR_PROB_BASE / 2;
897  int d;
898  bool first_match = false;
899  bool found = false;
900  struct edge_prediction *pred;
901  int nedges = 0;
902  edge e, first = NULL, second = NULL;
903  edge_iterator ei;
904  void **preds;
905
906  FOR_EACH_EDGE (e, ei, bb->succs)
907    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
908      {
909	nedges ++;
910	if (first && !second)
911	  second = e;
912	if (!first)
913	  first = e;
914      }
915
916  /* When there is no successor or only one choice, prediction is easy.
917
918     We are lazy for now and predict only basic blocks with two outgoing
919     edges.  It is possible to predict generic case too, but we have to
920     ignore first match heuristics and do more involved combining.  Implement
921     this later.  */
922  if (nedges != 2)
923    {
924      if (!bb->count)
925	set_even_probabilities (bb);
926      clear_bb_predictions (bb);
927      if (dump_file)
928	fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
929		 nedges, bb->index);
930      return;
931    }
932
933  if (dump_file)
934    fprintf (dump_file, "Predictions for bb %i\n", bb->index);
935
936  preds = pointer_map_contains (bb_predictions, bb);
937  if (preds)
938    {
939      /* We implement "first match" heuristics and use probability guessed
940	 by predictor with smallest index.  */
941      for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
942	{
943	  enum br_predictor predictor = pred->ep_predictor;
944	  int probability = pred->ep_probability;
945
946	  if (pred->ep_edge != first)
947	    probability = REG_BR_PROB_BASE - probability;
948
949	  found = true;
950	  /* First match heuristics would be widly confused if we predicted
951	     both directions.  */
952	  if (best_predictor > predictor)
953	    {
954              struct edge_prediction *pred2;
955	      int prob = probability;
956
957	      for (pred2 = (struct edge_prediction *) *preds;
958		   pred2; pred2 = pred2->ep_next)
959	       if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
960	         {
961	           int probability2 = pred->ep_probability;
962
963		   if (pred2->ep_edge != first)
964		     probability2 = REG_BR_PROB_BASE - probability2;
965
966		   if ((probability < REG_BR_PROB_BASE / 2) !=
967		       (probability2 < REG_BR_PROB_BASE / 2))
968		     break;
969
970		   /* If the same predictor later gave better result, go for it! */
971		   if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
972		       || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
973		     prob = probability2;
974		 }
975	      if (!pred2)
976	        best_probability = prob, best_predictor = predictor;
977	    }
978
979	  d = (combined_probability * probability
980	       + (REG_BR_PROB_BASE - combined_probability)
981	       * (REG_BR_PROB_BASE - probability));
982
983	  /* Use FP math to avoid overflows of 32bit integers.  */
984	  if (d == 0)
985	    /* If one probability is 0% and one 100%, avoid division by zero.  */
986	    combined_probability = REG_BR_PROB_BASE / 2;
987	  else
988	    combined_probability = (((double) combined_probability)
989				    * probability
990		    		    * REG_BR_PROB_BASE / d + 0.5);
991	}
992    }
993
994  /* Decide which heuristic to use.  In case we didn't match anything,
995     use no_prediction heuristic, in case we did match, use either
996     first match or Dempster-Shaffer theory depending on the flags.  */
997
998  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
999    first_match = true;
1000
1001  if (!found)
1002    dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1003  else
1004    {
1005      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1006		       !first_match);
1007      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1008		       first_match);
1009    }
1010
1011  if (first_match)
1012    combined_probability = best_probability;
1013  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1014
1015  if (preds)
1016    {
1017      for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1018	{
1019	  enum br_predictor predictor = pred->ep_predictor;
1020	  int probability = pred->ep_probability;
1021
1022	  if (pred->ep_edge != EDGE_SUCC (bb, 0))
1023	    probability = REG_BR_PROB_BASE - probability;
1024	  dump_prediction (dump_file, predictor, probability, bb,
1025			   !first_match || best_predictor == predictor);
1026	}
1027    }
1028  clear_bb_predictions (bb);
1029
1030  if (!bb->count)
1031    {
1032      first->probability = combined_probability;
1033      second->probability = REG_BR_PROB_BASE - combined_probability;
1034    }
1035}
1036
1037/* Check if T1 and T2 satisfy the IV_COMPARE condition.
1038   Return the SSA_NAME if the condition satisfies, NULL otherwise.
1039
1040   T1 and T2 should be one of the following cases:
1041     1. T1 is SSA_NAME, T2 is NULL
1042     2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1043     3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1044
1045static tree
1046strips_small_constant (tree t1, tree t2)
1047{
1048  tree ret = NULL;
1049  int value = 0;
1050
1051  if (!t1)
1052    return NULL;
1053  else if (TREE_CODE (t1) == SSA_NAME)
1054    ret = t1;
1055  else if (tree_fits_shwi_p (t1))
1056    value = tree_to_shwi (t1);
1057  else
1058    return NULL;
1059
1060  if (!t2)
1061    return ret;
1062  else if (tree_fits_shwi_p (t2))
1063    value = tree_to_shwi (t2);
1064  else if (TREE_CODE (t2) == SSA_NAME)
1065    {
1066      if (ret)
1067        return NULL;
1068      else
1069        ret = t2;
1070    }
1071
1072  if (value <= 4 && value >= -4)
1073    return ret;
1074  else
1075    return NULL;
1076}
1077
1078/* Return the SSA_NAME in T or T's operands.
1079   Return NULL if SSA_NAME cannot be found.  */
1080
1081static tree
1082get_base_value (tree t)
1083{
1084  if (TREE_CODE (t) == SSA_NAME)
1085    return t;
1086
1087  if (!BINARY_CLASS_P (t))
1088    return NULL;
1089
1090  switch (TREE_OPERAND_LENGTH (t))
1091    {
1092    case 1:
1093      return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1094    case 2:
1095      return strips_small_constant (TREE_OPERAND (t, 0),
1096				    TREE_OPERAND (t, 1));
1097    default:
1098      return NULL;
1099    }
1100}
1101
1102/* Check the compare STMT in LOOP. If it compares an induction
1103   variable to a loop invariant, return true, and save
1104   LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1105   Otherwise return false and set LOOP_INVAIANT to NULL.  */
1106
1107static bool
1108is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1109				     tree *loop_invariant,
1110				     enum tree_code *compare_code,
1111				     tree *loop_step,
1112				     tree *loop_iv_base)
1113{
1114  tree op0, op1, bound, base;
1115  affine_iv iv0, iv1;
1116  enum tree_code code;
1117  tree step;
1118
1119  code = gimple_cond_code (stmt);
1120  *loop_invariant = NULL;
1121
1122  switch (code)
1123    {
1124    case GT_EXPR:
1125    case GE_EXPR:
1126    case NE_EXPR:
1127    case LT_EXPR:
1128    case LE_EXPR:
1129    case EQ_EXPR:
1130      break;
1131
1132    default:
1133      return false;
1134    }
1135
1136  op0 = gimple_cond_lhs (stmt);
1137  op1 = gimple_cond_rhs (stmt);
1138
1139  if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1140       || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1141    return false;
1142  if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1143    return false;
1144  if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1145    return false;
1146  if (TREE_CODE (iv0.step) != INTEGER_CST
1147      || TREE_CODE (iv1.step) != INTEGER_CST)
1148    return false;
1149  if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1150      || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1151    return false;
1152
1153  if (integer_zerop (iv0.step))
1154    {
1155      if (code != NE_EXPR && code != EQ_EXPR)
1156	code = invert_tree_comparison (code, false);
1157      bound = iv0.base;
1158      base = iv1.base;
1159      if (tree_fits_shwi_p (iv1.step))
1160	step = iv1.step;
1161      else
1162	return false;
1163    }
1164  else
1165    {
1166      bound = iv1.base;
1167      base = iv0.base;
1168      if (tree_fits_shwi_p (iv0.step))
1169	step = iv0.step;
1170      else
1171	return false;
1172    }
1173
1174  if (TREE_CODE (bound) != INTEGER_CST)
1175    bound = get_base_value (bound);
1176  if (!bound)
1177    return false;
1178  if (TREE_CODE (base) != INTEGER_CST)
1179    base = get_base_value (base);
1180  if (!base)
1181    return false;
1182
1183  *loop_invariant = bound;
1184  *compare_code = code;
1185  *loop_step = step;
1186  *loop_iv_base = base;
1187  return true;
1188}
1189
1190/* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1191
1192static bool
1193expr_coherent_p (tree t1, tree t2)
1194{
1195  gimple stmt;
1196  tree ssa_name_1 = NULL;
1197  tree ssa_name_2 = NULL;
1198
1199  gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1200  gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1201
1202  if (t1 == t2)
1203    return true;
1204
1205  if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1206    return true;
1207  if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1208    return false;
1209
1210  /* Check to see if t1 is expressed/defined with t2.  */
1211  stmt = SSA_NAME_DEF_STMT (t1);
1212  gcc_assert (stmt != NULL);
1213  if (is_gimple_assign (stmt))
1214    {
1215      ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1216      if (ssa_name_1 && ssa_name_1 == t2)
1217	return true;
1218    }
1219
1220  /* Check to see if t2 is expressed/defined with t1.  */
1221  stmt = SSA_NAME_DEF_STMT (t2);
1222  gcc_assert (stmt != NULL);
1223  if (is_gimple_assign (stmt))
1224    {
1225      ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1226      if (ssa_name_2 && ssa_name_2 == t1)
1227	return true;
1228    }
1229
1230  /* Compare if t1 and t2's def_stmts are identical.  */
1231  if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1232    return true;
1233  else
1234    return false;
1235}
1236
1237/* Predict branch probability of BB when BB contains a branch that compares
1238   an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1239   loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1240
1241   E.g.
1242     for (int i = 0; i < bound; i++) {
1243       if (i < bound - 2)
1244	 computation_1();
1245       else
1246	 computation_2();
1247     }
1248
1249  In this loop, we will predict the branch inside the loop to be taken.  */
1250
1251static void
1252predict_iv_comparison (struct loop *loop, basic_block bb,
1253		       tree loop_bound_var,
1254		       tree loop_iv_base_var,
1255		       enum tree_code loop_bound_code,
1256		       int loop_bound_step)
1257{
1258  gimple stmt;
1259  tree compare_var, compare_base;
1260  enum tree_code compare_code;
1261  tree compare_step_var;
1262  edge then_edge;
1263  edge_iterator ei;
1264
1265  if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1266      || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1267      || predicted_by_p (bb, PRED_LOOP_EXIT))
1268    return;
1269
1270  stmt = last_stmt (bb);
1271  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1272    return;
1273  if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1274					    &compare_code,
1275					    &compare_step_var,
1276					    &compare_base))
1277    return;
1278
1279  /* Find the taken edge.  */
1280  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1281    if (then_edge->flags & EDGE_TRUE_VALUE)
1282      break;
1283
1284  /* When comparing an IV to a loop invariant, NE is more likely to be
1285     taken while EQ is more likely to be not-taken.  */
1286  if (compare_code == NE_EXPR)
1287    {
1288      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1289      return;
1290    }
1291  else if (compare_code == EQ_EXPR)
1292    {
1293      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1294      return;
1295    }
1296
1297  if (!expr_coherent_p (loop_iv_base_var, compare_base))
1298    return;
1299
1300  /* If loop bound, base and compare bound are all constants, we can
1301     calculate the probability directly.  */
1302  if (tree_fits_shwi_p (loop_bound_var)
1303      && tree_fits_shwi_p (compare_var)
1304      && tree_fits_shwi_p (compare_base))
1305    {
1306      int probability;
1307      bool overflow, overall_overflow = false;
1308      widest_int compare_count, tem;
1309
1310      /* (loop_bound - base) / compare_step */
1311      tem = wi::sub (wi::to_widest (loop_bound_var),
1312		     wi::to_widest (compare_base), SIGNED, &overflow);
1313      overall_overflow |= overflow;
1314      widest_int loop_count = wi::div_trunc (tem,
1315					     wi::to_widest (compare_step_var),
1316					     SIGNED, &overflow);
1317      overall_overflow |= overflow;
1318
1319      if (!wi::neg_p (wi::to_widest (compare_step_var))
1320          ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1321	{
1322	  /* (loop_bound - compare_bound) / compare_step */
1323	  tem = wi::sub (wi::to_widest (loop_bound_var),
1324			 wi::to_widest (compare_var), SIGNED, &overflow);
1325	  overall_overflow |= overflow;
1326	  compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1327					 SIGNED, &overflow);
1328	  overall_overflow |= overflow;
1329	}
1330      else
1331        {
1332	  /* (compare_bound - base) / compare_step */
1333	  tem = wi::sub (wi::to_widest (compare_var),
1334			 wi::to_widest (compare_base), SIGNED, &overflow);
1335	  overall_overflow |= overflow;
1336          compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1337					 SIGNED, &overflow);
1338	  overall_overflow |= overflow;
1339	}
1340      if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1341	++compare_count;
1342      if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1343	++loop_count;
1344      if (wi::neg_p (compare_count))
1345        compare_count = 0;
1346      if (wi::neg_p (loop_count))
1347        loop_count = 0;
1348      if (loop_count == 0)
1349	probability = 0;
1350      else if (wi::cmps (compare_count, loop_count) == 1)
1351	probability = REG_BR_PROB_BASE;
1352      else
1353        {
1354	  tem = compare_count * REG_BR_PROB_BASE;
1355	  tem = wi::udiv_trunc (tem, loop_count);
1356	  probability = tem.to_uhwi ();
1357	}
1358
1359      if (!overall_overflow)
1360        predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1361
1362      return;
1363    }
1364
1365  if (expr_coherent_p (loop_bound_var, compare_var))
1366    {
1367      if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1368	  && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1369	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1370      else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1371	       && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1372	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1373      else if (loop_bound_code == NE_EXPR)
1374	{
1375	  /* If the loop backedge condition is "(i != bound)", we do
1376	     the comparison based on the step of IV:
1377	     * step < 0 : backedge condition is like (i > bound)
1378	     * step > 0 : backedge condition is like (i < bound)  */
1379	  gcc_assert (loop_bound_step != 0);
1380	  if (loop_bound_step > 0
1381	      && (compare_code == LT_EXPR
1382		  || compare_code == LE_EXPR))
1383	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1384	  else if (loop_bound_step < 0
1385		   && (compare_code == GT_EXPR
1386		       || compare_code == GE_EXPR))
1387	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1388	  else
1389	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1390	}
1391      else
1392	/* The branch is predicted not-taken if loop_bound_code is
1393	   opposite with compare_code.  */
1394	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1395    }
1396  else if (expr_coherent_p (loop_iv_base_var, compare_var))
1397    {
1398      /* For cases like:
1399	   for (i = s; i < h; i++)
1400	     if (i > s + 2) ....
1401	 The branch should be predicted taken.  */
1402      if (loop_bound_step > 0
1403	  && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1404	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1405      else if (loop_bound_step < 0
1406	       && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1407	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1408      else
1409	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1410    }
1411}
1412
1413/* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1414   exits are resulted from short-circuit conditions that will generate an
1415   if_tmp. E.g.:
1416
1417   if (foo() || global > 10)
1418     break;
1419
1420   This will be translated into:
1421
1422   BB3:
1423     loop header...
1424   BB4:
1425     if foo() goto BB6 else goto BB5
1426   BB5:
1427     if global > 10 goto BB6 else goto BB7
1428   BB6:
1429     goto BB7
1430   BB7:
1431     iftmp = (PHI 0(BB5), 1(BB6))
1432     if iftmp == 1 goto BB8 else goto BB3
1433   BB8:
1434     outside of the loop...
1435
1436   The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1437   From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1438   exits. This function takes BB7->BB8 as input, and finds out the extra loop
1439   exits to predict them using PRED_LOOP_EXIT.  */
1440
1441static void
1442predict_extra_loop_exits (edge exit_edge)
1443{
1444  unsigned i;
1445  bool check_value_one;
1446  gimple phi_stmt;
1447  tree cmp_rhs, cmp_lhs;
1448  gimple cmp_stmt = last_stmt (exit_edge->src);
1449
1450  if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1451    return;
1452  cmp_rhs = gimple_cond_rhs (cmp_stmt);
1453  cmp_lhs = gimple_cond_lhs (cmp_stmt);
1454  if (!TREE_CONSTANT (cmp_rhs)
1455      || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1456    return;
1457  if (TREE_CODE (cmp_lhs) != SSA_NAME)
1458    return;
1459
1460  /* If check_value_one is true, only the phi_args with value '1' will lead
1461     to loop exit. Otherwise, only the phi_args with value '0' will lead to
1462     loop exit.  */
1463  check_value_one = (((integer_onep (cmp_rhs))
1464		    ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1465		    ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1466
1467  phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1468  if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1469    return;
1470
1471  for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1472    {
1473      edge e1;
1474      edge_iterator ei;
1475      tree val = gimple_phi_arg_def (phi_stmt, i);
1476      edge e = gimple_phi_arg_edge (phi_stmt, i);
1477
1478      if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1479	continue;
1480      if ((check_value_one ^ integer_onep (val)) == 1)
1481	continue;
1482      if (EDGE_COUNT (e->src->succs) != 1)
1483	{
1484	  predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1485	  continue;
1486	}
1487
1488      FOR_EACH_EDGE (e1, ei, e->src->preds)
1489	predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1490    }
1491}
1492
1493/* Predict edge probabilities by exploiting loop structure.  */
1494
1495static void
1496predict_loops (void)
1497{
1498  struct loop *loop;
1499
1500  /* Try to predict out blocks in a loop that are not part of a
1501     natural loop.  */
1502  FOR_EACH_LOOP (loop, 0)
1503    {
1504      basic_block bb, *bbs;
1505      unsigned j, n_exits;
1506      vec<edge> exits;
1507      struct tree_niter_desc niter_desc;
1508      edge ex;
1509      struct nb_iter_bound *nb_iter;
1510      enum tree_code loop_bound_code = ERROR_MARK;
1511      tree loop_bound_step = NULL;
1512      tree loop_bound_var = NULL;
1513      tree loop_iv_base = NULL;
1514      gimple stmt = NULL;
1515
1516      exits = get_loop_exit_edges (loop);
1517      n_exits = exits.length ();
1518      if (!n_exits)
1519	{
1520          exits.release ();
1521	  continue;
1522	}
1523
1524      FOR_EACH_VEC_ELT (exits, j, ex)
1525	{
1526	  tree niter = NULL;
1527	  HOST_WIDE_INT nitercst;
1528	  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1529	  int probability;
1530	  enum br_predictor predictor;
1531
1532	  predict_extra_loop_exits (ex);
1533
1534	  if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1535	    niter = niter_desc.niter;
1536	  if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1537	    niter = loop_niter_by_eval (loop, ex);
1538
1539	  if (TREE_CODE (niter) == INTEGER_CST)
1540	    {
1541	      if (tree_fits_uhwi_p (niter)
1542		  && max
1543		  && compare_tree_int (niter, max - 1) == -1)
1544		nitercst = tree_to_uhwi (niter) + 1;
1545	      else
1546		nitercst = max;
1547	      predictor = PRED_LOOP_ITERATIONS;
1548	    }
1549	  /* If we have just one exit and we can derive some information about
1550	     the number of iterations of the loop from the statements inside
1551	     the loop, use it to predict this exit.  */
1552	  else if (n_exits == 1)
1553	    {
1554	      nitercst = estimated_stmt_executions_int (loop);
1555	      if (nitercst < 0)
1556		continue;
1557	      if (nitercst > max)
1558		nitercst = max;
1559
1560	      predictor = PRED_LOOP_ITERATIONS_GUESSED;
1561	    }
1562	  else
1563	    continue;
1564
1565	  /* If the prediction for number of iterations is zero, do not
1566	     predict the exit edges.  */
1567	  if (nitercst == 0)
1568	    continue;
1569
1570	  probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1571	  predict_edge (ex, predictor, probability);
1572	}
1573      exits.release ();
1574
1575      /* Find information about loop bound variables.  */
1576      for (nb_iter = loop->bounds; nb_iter;
1577	   nb_iter = nb_iter->next)
1578	if (nb_iter->stmt
1579	    && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1580	  {
1581	    stmt = nb_iter->stmt;
1582	    break;
1583	  }
1584      if (!stmt && last_stmt (loop->header)
1585	  && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1586	stmt = last_stmt (loop->header);
1587      if (stmt)
1588	is_comparison_with_loop_invariant_p (stmt, loop,
1589					     &loop_bound_var,
1590					     &loop_bound_code,
1591					     &loop_bound_step,
1592					     &loop_iv_base);
1593
1594      bbs = get_loop_body (loop);
1595
1596      for (j = 0; j < loop->num_nodes; j++)
1597	{
1598	  int header_found = 0;
1599	  edge e;
1600	  edge_iterator ei;
1601
1602	  bb = bbs[j];
1603
1604	  /* Bypass loop heuristics on continue statement.  These
1605	     statements construct loops via "non-loop" constructs
1606	     in the source language and are better to be handled
1607	     separately.  */
1608	  if (predicted_by_p (bb, PRED_CONTINUE))
1609	    continue;
1610
1611	  /* Loop branch heuristics - predict an edge back to a
1612	     loop's head as taken.  */
1613	  if (bb == loop->latch)
1614	    {
1615	      e = find_edge (loop->latch, loop->header);
1616	      if (e)
1617		{
1618		  header_found = 1;
1619		  predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1620		}
1621	    }
1622
1623	  /* Loop exit heuristics - predict an edge exiting the loop if the
1624	     conditional has no loop header successors as not taken.  */
1625	  if (!header_found
1626	      /* If we already used more reliable loop exit predictors, do not
1627		 bother with PRED_LOOP_EXIT.  */
1628	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1629	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1630	    {
1631	      /* For loop with many exits we don't want to predict all exits
1632	         with the pretty large probability, because if all exits are
1633		 considered in row, the loop would be predicted to iterate
1634		 almost never.  The code to divide probability by number of
1635		 exits is very rough.  It should compute the number of exits
1636		 taken in each patch through function (not the overall number
1637		 of exits that might be a lot higher for loops with wide switch
1638		 statements in them) and compute n-th square root.
1639
1640		 We limit the minimal probability by 2% to avoid
1641		 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1642		 as this was causing regression in perl benchmark containing such
1643		 a wide loop.  */
1644
1645	      int probability = ((REG_BR_PROB_BASE
1646		                  - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1647				 / n_exits);
1648	      if (probability < HITRATE (2))
1649		probability = HITRATE (2);
1650	      FOR_EACH_EDGE (e, ei, bb->succs)
1651		if (e->dest->index < NUM_FIXED_BLOCKS
1652		    || !flow_bb_inside_loop_p (loop, e->dest))
1653		  predict_edge (e, PRED_LOOP_EXIT, probability);
1654	    }
1655	  if (loop_bound_var)
1656	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1657				   loop_bound_code,
1658				   tree_to_shwi (loop_bound_step));
1659	}
1660
1661      /* Free basic blocks from get_loop_body.  */
1662      free (bbs);
1663    }
1664}
1665
1666/* Attempt to predict probabilities of BB outgoing edges using local
1667   properties.  */
1668static void
1669bb_estimate_probability_locally (basic_block bb)
1670{
1671  rtx last_insn = BB_END (bb);
1672  rtx cond;
1673
1674  if (! can_predict_insn_p (last_insn))
1675    return;
1676  cond = get_condition (last_insn, NULL, false, false);
1677  if (! cond)
1678    return;
1679
1680  /* Try "pointer heuristic."
1681     A comparison ptr == 0 is predicted as false.
1682     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1683  if (COMPARISON_P (cond)
1684      && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1685	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1686    {
1687      if (GET_CODE (cond) == EQ)
1688	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1689      else if (GET_CODE (cond) == NE)
1690	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1691    }
1692  else
1693
1694  /* Try "opcode heuristic."
1695     EQ tests are usually false and NE tests are usually true. Also,
1696     most quantities are positive, so we can make the appropriate guesses
1697     about signed comparisons against zero.  */
1698    switch (GET_CODE (cond))
1699      {
1700      case CONST_INT:
1701	/* Unconditional branch.  */
1702	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1703			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
1704	break;
1705
1706      case EQ:
1707      case UNEQ:
1708	/* Floating point comparisons appears to behave in a very
1709	   unpredictable way because of special role of = tests in
1710	   FP code.  */
1711	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1712	  ;
1713	/* Comparisons with 0 are often used for booleans and there is
1714	   nothing useful to predict about them.  */
1715	else if (XEXP (cond, 1) == const0_rtx
1716		 || XEXP (cond, 0) == const0_rtx)
1717	  ;
1718	else
1719	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1720	break;
1721
1722      case NE:
1723      case LTGT:
1724	/* Floating point comparisons appears to behave in a very
1725	   unpredictable way because of special role of = tests in
1726	   FP code.  */
1727	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1728	  ;
1729	/* Comparisons with 0 are often used for booleans and there is
1730	   nothing useful to predict about them.  */
1731	else if (XEXP (cond, 1) == const0_rtx
1732		 || XEXP (cond, 0) == const0_rtx)
1733	  ;
1734	else
1735	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1736	break;
1737
1738      case ORDERED:
1739	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1740	break;
1741
1742      case UNORDERED:
1743	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1744	break;
1745
1746      case LE:
1747      case LT:
1748	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1749	    || XEXP (cond, 1) == constm1_rtx)
1750	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1751	break;
1752
1753      case GE:
1754      case GT:
1755	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1756	    || XEXP (cond, 1) == constm1_rtx)
1757	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1758	break;
1759
1760      default:
1761	break;
1762      }
1763}
1764
1765/* Set edge->probability for each successor edge of BB.  */
1766void
1767guess_outgoing_edge_probabilities (basic_block bb)
1768{
1769  bb_estimate_probability_locally (bb);
1770  combine_predictions_for_insn (BB_END (bb), bb);
1771}
1772
1773static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1774
1775/* Helper function for expr_expected_value.  */
1776
1777static tree
1778expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1779		       tree op1, bitmap visited, enum br_predictor *predictor)
1780{
1781  gimple def;
1782
1783  if (predictor)
1784    *predictor = PRED_UNCONDITIONAL;
1785
1786  if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1787    {
1788      if (TREE_CONSTANT (op0))
1789	return op0;
1790
1791      if (code != SSA_NAME)
1792	return NULL_TREE;
1793
1794      def = SSA_NAME_DEF_STMT (op0);
1795
1796      /* If we were already here, break the infinite cycle.  */
1797      if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1798	return NULL;
1799
1800      if (gimple_code (def) == GIMPLE_PHI)
1801	{
1802	  /* All the arguments of the PHI node must have the same constant
1803	     length.  */
1804	  int i, n = gimple_phi_num_args (def);
1805	  tree val = NULL, new_val;
1806
1807	  for (i = 0; i < n; i++)
1808	    {
1809	      tree arg = PHI_ARG_DEF (def, i);
1810	      enum br_predictor predictor2;
1811
1812	      /* If this PHI has itself as an argument, we cannot
1813		 determine the string length of this argument.  However,
1814		 if we can find an expected constant value for the other
1815		 PHI args then we can still be sure that this is
1816		 likely a constant.  So be optimistic and just
1817		 continue with the next argument.  */
1818	      if (arg == PHI_RESULT (def))
1819		continue;
1820
1821	      new_val = expr_expected_value (arg, visited, &predictor2);
1822
1823	      /* It is difficult to combine value predictors.  Simply assume
1824		 that later predictor is weaker and take its prediction.  */
1825	      if (predictor && *predictor < predictor2)
1826		*predictor = predictor2;
1827	      if (!new_val)
1828		return NULL;
1829	      if (!val)
1830		val = new_val;
1831	      else if (!operand_equal_p (val, new_val, false))
1832		return NULL;
1833	    }
1834	  return val;
1835	}
1836      if (is_gimple_assign (def))
1837	{
1838	  if (gimple_assign_lhs (def) != op0)
1839	    return NULL;
1840
1841	  return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1842					gimple_assign_rhs1 (def),
1843					gimple_assign_rhs_code (def),
1844					gimple_assign_rhs2 (def),
1845					visited, predictor);
1846	}
1847
1848      if (is_gimple_call (def))
1849	{
1850	  tree decl = gimple_call_fndecl (def);
1851	  if (!decl)
1852	    {
1853	      if (gimple_call_internal_p (def)
1854		  && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1855		{
1856		  gcc_assert (gimple_call_num_args (def) == 3);
1857		  tree val = gimple_call_arg (def, 0);
1858		  if (TREE_CONSTANT (val))
1859		    return val;
1860		  if (predictor)
1861		    {
1862		      *predictor = PRED_BUILTIN_EXPECT;
1863		      tree val2 = gimple_call_arg (def, 2);
1864		      gcc_assert (TREE_CODE (val2) == INTEGER_CST
1865				  && tree_fits_uhwi_p (val2)
1866				  && tree_to_uhwi (val2) < END_PREDICTORS);
1867		      *predictor = (enum br_predictor) tree_to_uhwi (val2);
1868		    }
1869		  return gimple_call_arg (def, 1);
1870		}
1871	      return NULL;
1872	    }
1873	  if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1874	    switch (DECL_FUNCTION_CODE (decl))
1875	      {
1876	      case BUILT_IN_EXPECT:
1877		{
1878		  tree val;
1879		  if (gimple_call_num_args (def) != 2)
1880		    return NULL;
1881		  val = gimple_call_arg (def, 0);
1882		  if (TREE_CONSTANT (val))
1883		    return val;
1884		  if (predictor)
1885		    *predictor = PRED_BUILTIN_EXPECT;
1886		  return gimple_call_arg (def, 1);
1887		}
1888
1889	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1890	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1891	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1892	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1893	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1894	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1895	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1896	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1897	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1898	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1899	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1900	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1901	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1902		/* Assume that any given atomic operation has low contention,
1903		   and thus the compare-and-swap operation succeeds.  */
1904		if (predictor)
1905		  *predictor = PRED_COMPARE_AND_SWAP;
1906		return boolean_true_node;
1907	    }
1908	}
1909
1910      return NULL;
1911    }
1912
1913  if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1914    {
1915      tree res;
1916      enum br_predictor predictor2;
1917      op0 = expr_expected_value (op0, visited, predictor);
1918      if (!op0)
1919	return NULL;
1920      op1 = expr_expected_value (op1, visited, &predictor2);
1921      if (predictor && *predictor < predictor2)
1922	*predictor = predictor2;
1923      if (!op1)
1924	return NULL;
1925      res = fold_build2 (code, type, op0, op1);
1926      if (TREE_CONSTANT (res))
1927	return res;
1928      return NULL;
1929    }
1930  if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1931    {
1932      tree res;
1933      op0 = expr_expected_value (op0, visited, predictor);
1934      if (!op0)
1935	return NULL;
1936      res = fold_build1 (code, type, op0);
1937      if (TREE_CONSTANT (res))
1938	return res;
1939      return NULL;
1940    }
1941  return NULL;
1942}
1943
1944/* Return constant EXPR will likely have at execution time, NULL if unknown.
1945   The function is used by builtin_expect branch predictor so the evidence
1946   must come from this construct and additional possible constant folding.
1947
1948   We may want to implement more involved value guess (such as value range
1949   propagation based prediction), but such tricks shall go to new
1950   implementation.  */
1951
1952static tree
1953expr_expected_value (tree expr, bitmap visited,
1954		     enum br_predictor *predictor)
1955{
1956  enum tree_code code;
1957  tree op0, op1;
1958
1959  if (TREE_CONSTANT (expr))
1960    {
1961      if (predictor)
1962	*predictor = PRED_UNCONDITIONAL;
1963      return expr;
1964    }
1965
1966  extract_ops_from_tree (expr, &code, &op0, &op1);
1967  return expr_expected_value_1 (TREE_TYPE (expr),
1968				op0, code, op1, visited, predictor);
1969}
1970
1971/* Predict using opcode of the last statement in basic block.  */
1972static void
1973tree_predict_by_opcode (basic_block bb)
1974{
1975  gimple stmt = last_stmt (bb);
1976  edge then_edge;
1977  tree op0, op1;
1978  tree type;
1979  tree val;
1980  enum tree_code cmp;
1981  bitmap visited;
1982  edge_iterator ei;
1983  enum br_predictor predictor;
1984
1985  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1986    return;
1987  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1988    if (then_edge->flags & EDGE_TRUE_VALUE)
1989      break;
1990  op0 = gimple_cond_lhs (stmt);
1991  op1 = gimple_cond_rhs (stmt);
1992  cmp = gimple_cond_code (stmt);
1993  type = TREE_TYPE (op0);
1994  visited = BITMAP_ALLOC (NULL);
1995  val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1996			       &predictor);
1997  BITMAP_FREE (visited);
1998  if (val && TREE_CODE (val) == INTEGER_CST)
1999    {
2000      if (predictor == PRED_BUILTIN_EXPECT)
2001	{
2002	  int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2003
2004	  gcc_assert (percent >= 0 && percent <= 100);
2005	  if (integer_zerop (val))
2006	    percent = 100 - percent;
2007	  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2008	}
2009      else
2010	predict_edge (then_edge, predictor,
2011		      integer_zerop (val) ? NOT_TAKEN : TAKEN);
2012    }
2013  /* Try "pointer heuristic."
2014     A comparison ptr == 0 is predicted as false.
2015     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2016  if (POINTER_TYPE_P (type))
2017    {
2018      if (cmp == EQ_EXPR)
2019	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2020      else if (cmp == NE_EXPR)
2021	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2022    }
2023  else
2024
2025  /* Try "opcode heuristic."
2026     EQ tests are usually false and NE tests are usually true. Also,
2027     most quantities are positive, so we can make the appropriate guesses
2028     about signed comparisons against zero.  */
2029    switch (cmp)
2030      {
2031      case EQ_EXPR:
2032      case UNEQ_EXPR:
2033	/* Floating point comparisons appears to behave in a very
2034	   unpredictable way because of special role of = tests in
2035	   FP code.  */
2036	if (FLOAT_TYPE_P (type))
2037	  ;
2038	/* Comparisons with 0 are often used for booleans and there is
2039	   nothing useful to predict about them.  */
2040	else if (integer_zerop (op0) || integer_zerop (op1))
2041	  ;
2042	else
2043	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2044	break;
2045
2046      case NE_EXPR:
2047      case LTGT_EXPR:
2048	/* Floating point comparisons appears to behave in a very
2049	   unpredictable way because of special role of = tests in
2050	   FP code.  */
2051	if (FLOAT_TYPE_P (type))
2052	  ;
2053	/* Comparisons with 0 are often used for booleans and there is
2054	   nothing useful to predict about them.  */
2055	else if (integer_zerop (op0)
2056		 || integer_zerop (op1))
2057	  ;
2058	else
2059	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2060	break;
2061
2062      case ORDERED_EXPR:
2063	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2064	break;
2065
2066      case UNORDERED_EXPR:
2067	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2068	break;
2069
2070      case LE_EXPR:
2071      case LT_EXPR:
2072	if (integer_zerop (op1)
2073	    || integer_onep (op1)
2074	    || integer_all_onesp (op1)
2075	    || real_zerop (op1)
2076	    || real_onep (op1)
2077	    || real_minus_onep (op1))
2078	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2079	break;
2080
2081      case GE_EXPR:
2082      case GT_EXPR:
2083	if (integer_zerop (op1)
2084	    || integer_onep (op1)
2085	    || integer_all_onesp (op1)
2086	    || real_zerop (op1)
2087	    || real_onep (op1)
2088	    || real_minus_onep (op1))
2089	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2090	break;
2091
2092      default:
2093	break;
2094      }
2095}
2096
2097/* Try to guess whether the value of return means error code.  */
2098
2099static enum br_predictor
2100return_prediction (tree val, enum prediction *prediction)
2101{
2102  /* VOID.  */
2103  if (!val)
2104    return PRED_NO_PREDICTION;
2105  /* Different heuristics for pointers and scalars.  */
2106  if (POINTER_TYPE_P (TREE_TYPE (val)))
2107    {
2108      /* NULL is usually not returned.  */
2109      if (integer_zerop (val))
2110	{
2111	  *prediction = NOT_TAKEN;
2112	  return PRED_NULL_RETURN;
2113	}
2114    }
2115  else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2116    {
2117      /* Negative return values are often used to indicate
2118         errors.  */
2119      if (TREE_CODE (val) == INTEGER_CST
2120	  && tree_int_cst_sgn (val) < 0)
2121	{
2122	  *prediction = NOT_TAKEN;
2123	  return PRED_NEGATIVE_RETURN;
2124	}
2125      /* Constant return values seems to be commonly taken.
2126         Zero/one often represent booleans so exclude them from the
2127	 heuristics.  */
2128      if (TREE_CONSTANT (val)
2129	  && (!integer_zerop (val) && !integer_onep (val)))
2130	{
2131	  *prediction = TAKEN;
2132	  return PRED_CONST_RETURN;
2133	}
2134    }
2135  return PRED_NO_PREDICTION;
2136}
2137
2138/* Find the basic block with return expression and look up for possible
2139   return value trying to apply RETURN_PREDICTION heuristics.  */
2140static void
2141apply_return_prediction (void)
2142{
2143  gimple return_stmt = NULL;
2144  tree return_val;
2145  edge e;
2146  gimple phi;
2147  int phi_num_args, i;
2148  enum br_predictor pred;
2149  enum prediction direction;
2150  edge_iterator ei;
2151
2152  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2153    {
2154      return_stmt = last_stmt (e->src);
2155      if (return_stmt
2156	  && gimple_code (return_stmt) == GIMPLE_RETURN)
2157	break;
2158    }
2159  if (!e)
2160    return;
2161  return_val = gimple_return_retval (return_stmt);
2162  if (!return_val)
2163    return;
2164  if (TREE_CODE (return_val) != SSA_NAME
2165      || !SSA_NAME_DEF_STMT (return_val)
2166      || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2167    return;
2168  phi = SSA_NAME_DEF_STMT (return_val);
2169  phi_num_args = gimple_phi_num_args (phi);
2170  pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2171
2172  /* Avoid the degenerate case where all return values form the function
2173     belongs to same category (ie they are all positive constants)
2174     so we can hardly say something about them.  */
2175  for (i = 1; i < phi_num_args; i++)
2176    if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2177      break;
2178  if (i != phi_num_args)
2179    for (i = 0; i < phi_num_args; i++)
2180      {
2181	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2182	if (pred != PRED_NO_PREDICTION)
2183	  predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2184				         direction);
2185      }
2186}
2187
2188/* Look for basic block that contains unlikely to happen events
2189   (such as noreturn calls) and mark all paths leading to execution
2190   of this basic blocks as unlikely.  */
2191
2192static void
2193tree_bb_level_predictions (void)
2194{
2195  basic_block bb;
2196  bool has_return_edges = false;
2197  edge e;
2198  edge_iterator ei;
2199
2200  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2201    if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2202      {
2203        has_return_edges = true;
2204	break;
2205      }
2206
2207  apply_return_prediction ();
2208
2209  FOR_EACH_BB_FN (bb, cfun)
2210    {
2211      gimple_stmt_iterator gsi;
2212
2213      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2214	{
2215	  gimple stmt = gsi_stmt (gsi);
2216	  tree decl;
2217
2218	  if (is_gimple_call (stmt))
2219	    {
2220	      if ((gimple_call_flags (stmt) & ECF_NORETURN)
2221	          && has_return_edges)
2222		predict_paths_leading_to (bb, PRED_NORETURN,
2223					  NOT_TAKEN);
2224	      decl = gimple_call_fndecl (stmt);
2225	      if (decl
2226		  && lookup_attribute ("cold",
2227				       DECL_ATTRIBUTES (decl)))
2228		predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2229					  NOT_TAKEN);
2230	    }
2231	  else if (gimple_code (stmt) == GIMPLE_PREDICT)
2232	    {
2233	      predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2234					gimple_predict_outcome (stmt));
2235	      /* Keep GIMPLE_PREDICT around so early inlining will propagate
2236	         hints to callers.  */
2237	    }
2238	}
2239    }
2240}
2241
2242#ifdef ENABLE_CHECKING
2243
2244/* Callback for pointer_map_traverse, asserts that the pointer map is
2245   empty.  */
2246
2247static bool
2248assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2249		 void *data ATTRIBUTE_UNUSED)
2250{
2251  gcc_assert (!*value);
2252  return false;
2253}
2254#endif
2255
2256/* Predict branch probabilities and estimate profile for basic block BB.  */
2257
2258static void
2259tree_estimate_probability_bb (basic_block bb)
2260{
2261  edge e;
2262  edge_iterator ei;
2263  gimple last;
2264
2265  FOR_EACH_EDGE (e, ei, bb->succs)
2266    {
2267      /* Predict edges to user labels with attributes.  */
2268      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2269	{
2270	  gimple_stmt_iterator gi;
2271	  for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2272	    {
2273	      gimple stmt = gsi_stmt (gi);
2274	      tree decl;
2275
2276	      if (gimple_code (stmt) != GIMPLE_LABEL)
2277		break;
2278	      decl = gimple_label_label (stmt);
2279	      if (DECL_ARTIFICIAL (decl))
2280		continue;
2281
2282	      /* Finally, we have a user-defined label.  */
2283	      if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2284		predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2285	      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2286		predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2287	    }
2288	}
2289
2290      /* Predict early returns to be probable, as we've already taken
2291	 care for error returns and other cases are often used for
2292	 fast paths through function.
2293
2294	 Since we've already removed the return statements, we are
2295	 looking for CFG like:
2296
2297	 if (conditional)
2298	 {
2299	 ..
2300	 goto return_block
2301	 }
2302	 some other blocks
2303	 return_block:
2304	 return_stmt.  */
2305      if (e->dest != bb->next_bb
2306	  && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2307	  && single_succ_p (e->dest)
2308	  && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2309	  && (last = last_stmt (e->dest)) != NULL
2310	  && gimple_code (last) == GIMPLE_RETURN)
2311	{
2312	  edge e1;
2313	  edge_iterator ei1;
2314
2315	  if (single_succ_p (bb))
2316	    {
2317	      FOR_EACH_EDGE (e1, ei1, bb->preds)
2318		if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2319		    && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2320		    && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2321		  predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2322	    }
2323	  else
2324	    if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2325		&& !predicted_by_p (e->src, PRED_CONST_RETURN)
2326		&& !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2327	      predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2328	}
2329
2330      /* Look for block we are guarding (ie we dominate it,
2331	 but it doesn't postdominate us).  */
2332      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2333	  && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2334	  && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2335	{
2336	  gimple_stmt_iterator bi;
2337
2338	  /* The call heuristic claims that a guarded function call
2339	     is improbable.  This is because such calls are often used
2340	     to signal exceptional situations such as printing error
2341	     messages.  */
2342	  for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2343	       gsi_next (&bi))
2344	    {
2345	      gimple stmt = gsi_stmt (bi);
2346	      if (is_gimple_call (stmt)
2347		  /* Constant and pure calls are hardly used to signalize
2348		     something exceptional.  */
2349		  && gimple_has_side_effects (stmt))
2350		{
2351		  predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2352		  break;
2353		}
2354	    }
2355	}
2356    }
2357  tree_predict_by_opcode (bb);
2358}
2359
2360/* Predict branch probabilities and estimate profile of the tree CFG.
2361   This function can be called from the loop optimizers to recompute
2362   the profile information.  */
2363
2364void
2365tree_estimate_probability (void)
2366{
2367  basic_block bb;
2368
2369  add_noreturn_fake_exit_edges ();
2370  connect_infinite_loops_to_exit ();
2371  /* We use loop_niter_by_eval, which requires that the loops have
2372     preheaders.  */
2373  create_preheaders (CP_SIMPLE_PREHEADERS);
2374  calculate_dominance_info (CDI_POST_DOMINATORS);
2375
2376  bb_predictions = pointer_map_create ();
2377  tree_bb_level_predictions ();
2378  record_loop_exits ();
2379
2380  if (number_of_loops (cfun) > 1)
2381    predict_loops ();
2382
2383  FOR_EACH_BB_FN (bb, cfun)
2384    tree_estimate_probability_bb (bb);
2385
2386  FOR_EACH_BB_FN (bb, cfun)
2387    combine_predictions_for_bb (bb);
2388
2389#ifdef ENABLE_CHECKING
2390  pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2391#endif
2392  pointer_map_destroy (bb_predictions);
2393  bb_predictions = NULL;
2394
2395  estimate_bb_frequencies (false);
2396  free_dominance_info (CDI_POST_DOMINATORS);
2397  remove_fake_exit_edges ();
2398}
2399
2400/* Predict edges to successors of CUR whose sources are not postdominated by
2401   BB by PRED and recurse to all postdominators.  */
2402
2403static void
2404predict_paths_for_bb (basic_block cur, basic_block bb,
2405		      enum br_predictor pred,
2406		      enum prediction taken,
2407		      bitmap visited)
2408{
2409  edge e;
2410  edge_iterator ei;
2411  basic_block son;
2412
2413  /* We are looking for all edges forming edge cut induced by
2414     set of all blocks postdominated by BB.  */
2415  FOR_EACH_EDGE (e, ei, cur->preds)
2416    if (e->src->index >= NUM_FIXED_BLOCKS
2417	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2418    {
2419      edge e2;
2420      edge_iterator ei2;
2421      bool found = false;
2422
2423      /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2424      if (e->flags & (EDGE_EH | EDGE_FAKE))
2425	continue;
2426      gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2427
2428      /* See if there is an edge from e->src that is not abnormal
2429	 and does not lead to BB.  */
2430      FOR_EACH_EDGE (e2, ei2, e->src->succs)
2431	if (e2 != e
2432	    && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2433	    && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2434	  {
2435	    found = true;
2436	    break;
2437	  }
2438
2439      /* If there is non-abnormal path leaving e->src, predict edge
2440	 using predictor.  Otherwise we need to look for paths
2441	 leading to e->src.
2442
2443	 The second may lead to infinite loop in the case we are predicitng
2444	 regions that are only reachable by abnormal edges.  We simply
2445	 prevent visiting given BB twice.  */
2446      if (found)
2447        predict_edge_def (e, pred, taken);
2448      else if (bitmap_set_bit (visited, e->src->index))
2449	predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2450    }
2451  for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2452       son;
2453       son = next_dom_son (CDI_POST_DOMINATORS, son))
2454    predict_paths_for_bb (son, bb, pred, taken, visited);
2455}
2456
2457/* Sets branch probabilities according to PREDiction and
2458   FLAGS.  */
2459
2460static void
2461predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2462			  enum prediction taken)
2463{
2464  bitmap visited = BITMAP_ALLOC (NULL);
2465  predict_paths_for_bb (bb, bb, pred, taken, visited);
2466  BITMAP_FREE (visited);
2467}
2468
2469/* Like predict_paths_leading_to but take edge instead of basic block.  */
2470
2471static void
2472predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2473			       enum prediction taken)
2474{
2475  bool has_nonloop_edge = false;
2476  edge_iterator ei;
2477  edge e2;
2478
2479  basic_block bb = e->src;
2480  FOR_EACH_EDGE (e2, ei, bb->succs)
2481    if (e2->dest != e->src && e2->dest != e->dest
2482	&& !(e->flags & (EDGE_EH | EDGE_FAKE))
2483	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2484      {
2485	has_nonloop_edge = true;
2486	break;
2487      }
2488  if (!has_nonloop_edge)
2489    {
2490      bitmap visited = BITMAP_ALLOC (NULL);
2491      predict_paths_for_bb (bb, bb, pred, taken, visited);
2492      BITMAP_FREE (visited);
2493    }
2494  else
2495    predict_edge_def (e, pred, taken);
2496}
2497
2498/* This is used to carry information about basic blocks.  It is
2499   attached to the AUX field of the standard CFG block.  */
2500
2501typedef struct block_info_def
2502{
2503  /* Estimated frequency of execution of basic_block.  */
2504  sreal frequency;
2505
2506  /* To keep queue of basic blocks to process.  */
2507  basic_block next;
2508
2509  /* Number of predecessors we need to visit first.  */
2510  int npredecessors;
2511} *block_info;
2512
2513/* Similar information for edges.  */
2514typedef struct edge_info_def
2515{
2516  /* In case edge is a loopback edge, the probability edge will be reached
2517     in case header is.  Estimated number of iterations of the loop can be
2518     then computed as 1 / (1 - back_edge_prob).  */
2519  sreal back_edge_prob;
2520  /* True if the edge is a loopback edge in the natural loop.  */
2521  unsigned int back_edge:1;
2522} *edge_info;
2523
2524#define BLOCK_INFO(B)	((block_info) (B)->aux)
2525#define EDGE_INFO(E)	((edge_info) (E)->aux)
2526
2527/* Helper function for estimate_bb_frequencies.
2528   Propagate the frequencies in blocks marked in
2529   TOVISIT, starting in HEAD.  */
2530
2531static void
2532propagate_freq (basic_block head, bitmap tovisit)
2533{
2534  basic_block bb;
2535  basic_block last;
2536  unsigned i;
2537  edge e;
2538  basic_block nextbb;
2539  bitmap_iterator bi;
2540
2541  /* For each basic block we need to visit count number of his predecessors
2542     we need to visit first.  */
2543  EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2544    {
2545      edge_iterator ei;
2546      int count = 0;
2547
2548      bb = BASIC_BLOCK_FOR_FN (cfun, i);
2549
2550      FOR_EACH_EDGE (e, ei, bb->preds)
2551	{
2552	  bool visit = bitmap_bit_p (tovisit, e->src->index);
2553
2554	  if (visit && !(e->flags & EDGE_DFS_BACK))
2555	    count++;
2556	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2557	    fprintf (dump_file,
2558		     "Irreducible region hit, ignoring edge to %i->%i\n",
2559		     e->src->index, bb->index);
2560	}
2561      BLOCK_INFO (bb)->npredecessors = count;
2562      /* When function never returns, we will never process exit block.  */
2563      if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2564	bb->count = bb->frequency = 0;
2565    }
2566
2567  memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2568  last = head;
2569  for (bb = head; bb; bb = nextbb)
2570    {
2571      edge_iterator ei;
2572      sreal cyclic_probability, frequency;
2573
2574      memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2575      memcpy (&frequency, &real_zero, sizeof (real_zero));
2576
2577      nextbb = BLOCK_INFO (bb)->next;
2578      BLOCK_INFO (bb)->next = NULL;
2579
2580      /* Compute frequency of basic block.  */
2581      if (bb != head)
2582	{
2583#ifdef ENABLE_CHECKING
2584	  FOR_EACH_EDGE (e, ei, bb->preds)
2585	    gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2586			|| (e->flags & EDGE_DFS_BACK));
2587#endif
2588
2589	  FOR_EACH_EDGE (e, ei, bb->preds)
2590	    if (EDGE_INFO (e)->back_edge)
2591	      {
2592		sreal_add (&cyclic_probability, &cyclic_probability,
2593			   &EDGE_INFO (e)->back_edge_prob);
2594	      }
2595	    else if (!(e->flags & EDGE_DFS_BACK))
2596	      {
2597		sreal tmp;
2598
2599		/*  frequency += (e->probability
2600				  * BLOCK_INFO (e->src)->frequency /
2601				  REG_BR_PROB_BASE);  */
2602
2603		sreal_init (&tmp, e->probability, 0);
2604		sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2605		sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2606		sreal_add (&frequency, &frequency, &tmp);
2607	      }
2608
2609	  if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2610	    {
2611	      memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2612		      sizeof (frequency));
2613	    }
2614	  else
2615	    {
2616	      if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2617		{
2618		  memcpy (&cyclic_probability, &real_almost_one,
2619			  sizeof (real_almost_one));
2620		}
2621
2622	      /* BLOCK_INFO (bb)->frequency = frequency
2623					      / (1 - cyclic_probability) */
2624
2625	      sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2626	      sreal_div (&BLOCK_INFO (bb)->frequency,
2627			 &frequency, &cyclic_probability);
2628	    }
2629	}
2630
2631      bitmap_clear_bit (tovisit, bb->index);
2632
2633      e = find_edge (bb, head);
2634      if (e)
2635	{
2636	  sreal tmp;
2637
2638	  /* EDGE_INFO (e)->back_edge_prob
2639	     = ((e->probability * BLOCK_INFO (bb)->frequency)
2640	     / REG_BR_PROB_BASE); */
2641
2642	  sreal_init (&tmp, e->probability, 0);
2643	  sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2644	  sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2645		     &tmp, &real_inv_br_prob_base);
2646	}
2647
2648      /* Propagate to successor blocks.  */
2649      FOR_EACH_EDGE (e, ei, bb->succs)
2650	if (!(e->flags & EDGE_DFS_BACK)
2651	    && BLOCK_INFO (e->dest)->npredecessors)
2652	  {
2653	    BLOCK_INFO (e->dest)->npredecessors--;
2654	    if (!BLOCK_INFO (e->dest)->npredecessors)
2655	      {
2656		if (!nextbb)
2657		  nextbb = e->dest;
2658		else
2659		  BLOCK_INFO (last)->next = e->dest;
2660
2661		last = e->dest;
2662	      }
2663	  }
2664    }
2665}
2666
2667/* Estimate frequencies in loops at same nest level.  */
2668
2669static void
2670estimate_loops_at_level (struct loop *first_loop)
2671{
2672  struct loop *loop;
2673
2674  for (loop = first_loop; loop; loop = loop->next)
2675    {
2676      edge e;
2677      basic_block *bbs;
2678      unsigned i;
2679      bitmap tovisit = BITMAP_ALLOC (NULL);
2680
2681      estimate_loops_at_level (loop->inner);
2682
2683      /* Find current loop back edge and mark it.  */
2684      e = loop_latch_edge (loop);
2685      EDGE_INFO (e)->back_edge = 1;
2686
2687      bbs = get_loop_body (loop);
2688      for (i = 0; i < loop->num_nodes; i++)
2689	bitmap_set_bit (tovisit, bbs[i]->index);
2690      free (bbs);
2691      propagate_freq (loop->header, tovisit);
2692      BITMAP_FREE (tovisit);
2693    }
2694}
2695
2696/* Propagates frequencies through structure of loops.  */
2697
2698static void
2699estimate_loops (void)
2700{
2701  bitmap tovisit = BITMAP_ALLOC (NULL);
2702  basic_block bb;
2703
2704  /* Start by estimating the frequencies in the loops.  */
2705  if (number_of_loops (cfun) > 1)
2706    estimate_loops_at_level (current_loops->tree_root->inner);
2707
2708  /* Now propagate the frequencies through all the blocks.  */
2709  FOR_ALL_BB_FN (bb, cfun)
2710    {
2711      bitmap_set_bit (tovisit, bb->index);
2712    }
2713  propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2714  BITMAP_FREE (tovisit);
2715}
2716
2717/* Drop the profile for NODE to guessed, and update its frequency based on
2718   whether it is expected to be hot given the CALL_COUNT.  */
2719
2720static void
2721drop_profile (struct cgraph_node *node, gcov_type call_count)
2722{
2723  struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2724  /* In the case where this was called by another function with a
2725     dropped profile, call_count will be 0. Since there are no
2726     non-zero call counts to this function, we don't know for sure
2727     whether it is hot, and therefore it will be marked normal below.  */
2728  bool hot = maybe_hot_count_p (NULL, call_count);
2729
2730  if (dump_file)
2731    fprintf (dump_file,
2732             "Dropping 0 profile for %s/%i. %s based on calls.\n",
2733             node->name (), node->order,
2734             hot ? "Function is hot" : "Function is normal");
2735  /* We only expect to miss profiles for functions that are reached
2736     via non-zero call edges in cases where the function may have
2737     been linked from another module or library (COMDATs and extern
2738     templates). See the comments below for handle_missing_profiles.
2739     Also, only warn in cases where the missing counts exceed the
2740     number of training runs. In certain cases with an execv followed
2741     by a no-return call the profile for the no-return call is not
2742     dumped and there can be a mismatch.  */
2743  if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2744      && call_count > profile_info->runs)
2745    {
2746      if (flag_profile_correction)
2747        {
2748          if (dump_file)
2749            fprintf (dump_file,
2750                     "Missing counts for called function %s/%i\n",
2751                     node->name (), node->order);
2752        }
2753      else
2754        warning (0, "Missing counts for called function %s/%i",
2755                 node->name (), node->order);
2756    }
2757
2758  profile_status_for_fn (fn)
2759      = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2760  node->frequency
2761      = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2762}
2763
2764/* In the case of COMDAT routines, multiple object files will contain the same
2765   function and the linker will select one for the binary. In that case
2766   all the other copies from the profile instrument binary will be missing
2767   profile counts. Look for cases where this happened, due to non-zero
2768   call counts going to 0-count functions, and drop the profile to guessed
2769   so that we can use the estimated probabilities and avoid optimizing only
2770   for size.
2771
2772   The other case where the profile may be missing is when the routine
2773   is not going to be emitted to the object file, e.g. for "extern template"
2774   class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2775   all other cases of non-zero calls to 0-count functions.  */
2776
2777void
2778handle_missing_profiles (void)
2779{
2780  struct cgraph_node *node;
2781  int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2782  vec<struct cgraph_node *> worklist;
2783  worklist.create (64);
2784
2785  /* See if 0 count function has non-0 count callers.  In this case we
2786     lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2787  FOR_EACH_DEFINED_FUNCTION (node)
2788    {
2789      struct cgraph_edge *e;
2790      gcov_type call_count = 0;
2791      gcov_type max_tp_first_run = 0;
2792      struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2793
2794      if (node->count)
2795        continue;
2796      for (e = node->callers; e; e = e->next_caller)
2797      {
2798        call_count += e->count;
2799
2800	if (e->caller->tp_first_run > max_tp_first_run)
2801	  max_tp_first_run = e->caller->tp_first_run;
2802      }
2803
2804      /* If time profile is missing, let assign the maximum that comes from
2805	 caller functions.  */
2806      if (!node->tp_first_run && max_tp_first_run)
2807	node->tp_first_run = max_tp_first_run + 1;
2808
2809      if (call_count
2810          && fn && fn->cfg
2811          && (call_count * unlikely_count_fraction >= profile_info->runs))
2812        {
2813          drop_profile (node, call_count);
2814          worklist.safe_push (node);
2815        }
2816    }
2817
2818  /* Propagate the profile dropping to other 0-count COMDATs that are
2819     potentially called by COMDATs we already dropped the profile on.  */
2820  while (worklist.length () > 0)
2821    {
2822      struct cgraph_edge *e;
2823
2824      node = worklist.pop ();
2825      for (e = node->callees; e; e = e->next_caller)
2826        {
2827          struct cgraph_node *callee = e->callee;
2828          struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2829
2830          if (callee->count > 0)
2831            continue;
2832          if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2833              && profile_status_for_fn (fn) == PROFILE_READ)
2834            {
2835              drop_profile (node, 0);
2836              worklist.safe_push (callee);
2837            }
2838        }
2839    }
2840  worklist.release ();
2841}
2842
2843/* Convert counts measured by profile driven feedback to frequencies.
2844   Return nonzero iff there was any nonzero execution count.  */
2845
2846int
2847counts_to_freqs (void)
2848{
2849  gcov_type count_max, true_count_max = 0;
2850  basic_block bb;
2851
2852  /* Don't overwrite the estimated frequencies when the profile for
2853     the function is missing.  We may drop this function PROFILE_GUESSED
2854     later in drop_profile ().  */
2855  if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2856    return 0;
2857
2858  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2859    true_count_max = MAX (bb->count, true_count_max);
2860
2861  count_max = MAX (true_count_max, 1);
2862  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2863    bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2864
2865  return true_count_max;
2866}
2867
2868/* Return true if function is likely to be expensive, so there is no point to
2869   optimize performance of prologue, epilogue or do inlining at the expense
2870   of code size growth.  THRESHOLD is the limit of number of instructions
2871   function can execute at average to be still considered not expensive.  */
2872
2873bool
2874expensive_function_p (int threshold)
2875{
2876  unsigned int sum = 0;
2877  basic_block bb;
2878  unsigned int limit;
2879
2880  /* We can not compute accurately for large thresholds due to scaled
2881     frequencies.  */
2882  gcc_assert (threshold <= BB_FREQ_MAX);
2883
2884  /* Frequencies are out of range.  This either means that function contains
2885     internal loop executing more than BB_FREQ_MAX times or profile feedback
2886     is available and function has not been executed at all.  */
2887  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2888    return true;
2889
2890  /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2891  limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2892  FOR_EACH_BB_FN (bb, cfun)
2893    {
2894      rtx insn;
2895
2896      FOR_BB_INSNS (bb, insn)
2897	if (active_insn_p (insn))
2898	  {
2899	    sum += bb->frequency;
2900	    if (sum > limit)
2901	      return true;
2902	}
2903    }
2904
2905  return false;
2906}
2907
2908/* Estimate and propagate basic block frequencies using the given branch
2909   probabilities.  If FORCE is true, the frequencies are used to estimate
2910   the counts even when there are already non-zero profile counts.  */
2911
2912void
2913estimate_bb_frequencies (bool force)
2914{
2915  basic_block bb;
2916  sreal freq_max;
2917
2918  if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2919    {
2920      static int real_values_initialized = 0;
2921
2922      if (!real_values_initialized)
2923        {
2924	  real_values_initialized = 1;
2925	  sreal_init (&real_zero, 0, 0);
2926	  sreal_init (&real_one, 1, 0);
2927	  sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2928	  sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2929	  sreal_init (&real_one_half, 1, -1);
2930	  sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2931	  sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2932	}
2933
2934      mark_dfs_back_edges ();
2935
2936      single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2937	 REG_BR_PROB_BASE;
2938
2939      /* Set up block info for each basic block.  */
2940      alloc_aux_for_blocks (sizeof (struct block_info_def));
2941      alloc_aux_for_edges (sizeof (struct edge_info_def));
2942      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2943	{
2944	  edge e;
2945	  edge_iterator ei;
2946
2947	  FOR_EACH_EDGE (e, ei, bb->succs)
2948	    {
2949	      sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2950	      sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2951			 &EDGE_INFO (e)->back_edge_prob,
2952			 &real_inv_br_prob_base);
2953	    }
2954	}
2955
2956      /* First compute frequencies locally for each loop from innermost
2957         to outermost to examine frequencies for back edges.  */
2958      estimate_loops ();
2959
2960      memcpy (&freq_max, &real_zero, sizeof (real_zero));
2961      FOR_EACH_BB_FN (bb, cfun)
2962	if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2963	  memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2964
2965      sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2966      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2967	{
2968	  sreal tmp;
2969
2970	  sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2971	  sreal_add (&tmp, &tmp, &real_one_half);
2972	  bb->frequency = sreal_to_int (&tmp);
2973	}
2974
2975      free_aux_for_blocks ();
2976      free_aux_for_edges ();
2977    }
2978  compute_function_frequency ();
2979}
2980
2981/* Decide whether function is hot, cold or unlikely executed.  */
2982void
2983compute_function_frequency (void)
2984{
2985  basic_block bb;
2986  struct cgraph_node *node = cgraph_get_node (current_function_decl);
2987
2988  if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2989      || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2990    node->only_called_at_startup = true;
2991  if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2992    node->only_called_at_exit = true;
2993
2994  if (profile_status_for_fn (cfun) != PROFILE_READ)
2995    {
2996      int flags = flags_from_decl_or_type (current_function_decl);
2997      if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2998	  != NULL)
2999        node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3000      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3001	       != NULL)
3002        node->frequency = NODE_FREQUENCY_HOT;
3003      else if (flags & ECF_NORETURN)
3004        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3005      else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3006        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3007      else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3008	       || DECL_STATIC_DESTRUCTOR (current_function_decl))
3009        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3010      return;
3011    }
3012
3013  /* Only first time try to drop function into unlikely executed.
3014     After inlining the roundoff errors may confuse us.
3015     Ipa-profile pass will drop functions only called from unlikely
3016     functions to unlikely and that is most of what we care about.  */
3017  if (!cfun->after_inlining)
3018    node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3019  FOR_EACH_BB_FN (bb, cfun)
3020    {
3021      if (maybe_hot_bb_p (cfun, bb))
3022	{
3023	  node->frequency = NODE_FREQUENCY_HOT;
3024	  return;
3025	}
3026      if (!probably_never_executed_bb_p (cfun, bb))
3027	node->frequency = NODE_FREQUENCY_NORMAL;
3028    }
3029}
3030
3031/* Build PREDICT_EXPR.  */
3032tree
3033build_predict_expr (enum br_predictor predictor, enum prediction taken)
3034{
3035  tree t = build1 (PREDICT_EXPR, void_type_node,
3036		   build_int_cst (integer_type_node, predictor));
3037  SET_PREDICT_EXPR_OUTCOME (t, taken);
3038  return t;
3039}
3040
3041const char *
3042predictor_name (enum br_predictor predictor)
3043{
3044  return predictor_info[predictor].name;
3045}
3046
3047/* Predict branch probabilities and estimate profile of the tree CFG. */
3048
3049namespace {
3050
3051const pass_data pass_data_profile =
3052{
3053  GIMPLE_PASS, /* type */
3054  "profile_estimate", /* name */
3055  OPTGROUP_NONE, /* optinfo_flags */
3056  TV_BRANCH_PROB, /* tv_id */
3057  PROP_cfg, /* properties_required */
3058  0, /* properties_provided */
3059  0, /* properties_destroyed */
3060  0, /* todo_flags_start */
3061  0, /* todo_flags_finish */
3062};
3063
3064class pass_profile : public gimple_opt_pass
3065{
3066public:
3067  pass_profile (gcc::context *ctxt)
3068    : gimple_opt_pass (pass_data_profile, ctxt)
3069  {}
3070
3071  /* opt_pass methods: */
3072  virtual bool gate (function *) { return flag_guess_branch_prob; }
3073  virtual unsigned int execute (function *);
3074
3075}; // class pass_profile
3076
3077unsigned int
3078pass_profile::execute (function *fun)
3079{
3080  unsigned nb_loops;
3081
3082  loop_optimizer_init (LOOPS_NORMAL);
3083  if (dump_file && (dump_flags & TDF_DETAILS))
3084    flow_loops_dump (dump_file, NULL, 0);
3085
3086  mark_irreducible_loops ();
3087
3088  nb_loops = number_of_loops (fun);
3089  if (nb_loops > 1)
3090    scev_initialize ();
3091
3092  tree_estimate_probability ();
3093
3094  if (nb_loops > 1)
3095    scev_finalize ();
3096
3097  loop_optimizer_finalize ();
3098  if (dump_file && (dump_flags & TDF_DETAILS))
3099    gimple_dump_cfg (dump_file, dump_flags);
3100 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3101    profile_status_for_fn (fun) = PROFILE_GUESSED;
3102  return 0;
3103}
3104
3105} // anon namespace
3106
3107gimple_opt_pass *
3108make_pass_profile (gcc::context *ctxt)
3109{
3110  return new pass_profile (ctxt);
3111}
3112
3113namespace {
3114
3115const pass_data pass_data_strip_predict_hints =
3116{
3117  GIMPLE_PASS, /* type */
3118  "*strip_predict_hints", /* name */
3119  OPTGROUP_NONE, /* optinfo_flags */
3120  TV_BRANCH_PROB, /* tv_id */
3121  PROP_cfg, /* properties_required */
3122  0, /* properties_provided */
3123  0, /* properties_destroyed */
3124  0, /* todo_flags_start */
3125  0, /* todo_flags_finish */
3126};
3127
3128class pass_strip_predict_hints : public gimple_opt_pass
3129{
3130public:
3131  pass_strip_predict_hints (gcc::context *ctxt)
3132    : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3133  {}
3134
3135  /* opt_pass methods: */
3136  opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3137  virtual unsigned int execute (function *);
3138
3139}; // class pass_strip_predict_hints
3140
3141/* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3142   we no longer need.  */
3143unsigned int
3144pass_strip_predict_hints::execute (function *fun)
3145{
3146  basic_block bb;
3147  gimple ass_stmt;
3148  tree var;
3149
3150  FOR_EACH_BB_FN (bb, fun)
3151    {
3152      gimple_stmt_iterator bi;
3153      for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3154	{
3155	  gimple stmt = gsi_stmt (bi);
3156
3157	  if (gimple_code (stmt) == GIMPLE_PREDICT)
3158	    {
3159	      gsi_remove (&bi, true);
3160	      continue;
3161	    }
3162	  else if (is_gimple_call (stmt))
3163	    {
3164	      tree fndecl = gimple_call_fndecl (stmt);
3165
3166	      if ((fndecl
3167		   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3168		   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3169		   && gimple_call_num_args (stmt) == 2)
3170		  || (gimple_call_internal_p (stmt)
3171		      && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3172		{
3173		  var = gimple_call_lhs (stmt);
3174		  if (var)
3175		    {
3176		      ass_stmt
3177			= gimple_build_assign (var, gimple_call_arg (stmt, 0));
3178		      gsi_replace (&bi, ass_stmt, true);
3179		    }
3180		  else
3181		    {
3182		      gsi_remove (&bi, true);
3183		      continue;
3184		    }
3185		}
3186	    }
3187	  gsi_next (&bi);
3188	}
3189    }
3190  return 0;
3191}
3192
3193} // anon namespace
3194
3195gimple_opt_pass *
3196make_pass_strip_predict_hints (gcc::context *ctxt)
3197{
3198  return new pass_strip_predict_hints (ctxt);
3199}
3200
3201/* Rebuild function frequencies.  Passes are in general expected to
3202   maintain profile by hand, however in some cases this is not possible:
3203   for example when inlining several functions with loops freuqencies might run
3204   out of scale and thus needs to be recomputed.  */
3205
3206void
3207rebuild_frequencies (void)
3208{
3209  timevar_push (TV_REBUILD_FREQUENCIES);
3210
3211  /* When the max bb count in the function is small, there is a higher
3212     chance that there were truncation errors in the integer scaling
3213     of counts by inlining and other optimizations. This could lead
3214     to incorrect classification of code as being cold when it isn't.
3215     In that case, force the estimation of bb counts/frequencies from the
3216     branch probabilities, rather than computing frequencies from counts,
3217     which may also lead to frequencies incorrectly reduced to 0. There
3218     is less precision in the probabilities, so we only do this for small
3219     max counts.  */
3220  gcov_type count_max = 0;
3221  basic_block bb;
3222  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3223    count_max = MAX (bb->count, count_max);
3224
3225  if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3226      || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3227    {
3228      loop_optimizer_init (0);
3229      add_noreturn_fake_exit_edges ();
3230      mark_irreducible_loops ();
3231      connect_infinite_loops_to_exit ();
3232      estimate_bb_frequencies (true);
3233      remove_fake_exit_edges ();
3234      loop_optimizer_finalize ();
3235    }
3236  else if (profile_status_for_fn (cfun) == PROFILE_READ)
3237    counts_to_freqs ();
3238  else
3239    gcc_unreachable ();
3240  timevar_pop (TV_REBUILD_FREQUENCIES);
3241}
3242