1/* Branch prediction routines for the GNU compiler.
2   Copyright (C) 2000-2014 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* References:
21
22   [1] "Branch Prediction for Free"
23       Ball and Larus; PLDI '93.
24   [2] "Static Branch Frequency and Program Profile Analysis"
25       Wu and Larus; MICRO-27.
26   [3] "Corpus-based Static Branch Prediction"
27       Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
28
29
30#include "config.h"
31#include "system.h"
32#include "coretypes.h"
33#include "tm.h"
34#include "tree.h"
35#include "calls.h"
36#include "rtl.h"
37#include "tm_p.h"
38#include "hard-reg-set.h"
39#include "basic-block.h"
40#include "insn-config.h"
41#include "regs.h"
42#include "flags.h"
43#include "function.h"
44#include "except.h"
45#include "diagnostic-core.h"
46#include "recog.h"
47#include "expr.h"
48#include "predict.h"
49#include "coverage.h"
50#include "sreal.h"
51#include "params.h"
52#include "target.h"
53#include "cfgloop.h"
54#include "pointer-set.h"
55#include "tree-ssa-alias.h"
56#include "internal-fn.h"
57#include "gimple-expr.h"
58#include "is-a.h"
59#include "gimple.h"
60#include "gimple-iterator.h"
61#include "gimple-ssa.h"
62#include "cgraph.h"
63#include "tree-cfg.h"
64#include "tree-phinodes.h"
65#include "ssa-iterators.h"
66#include "tree-ssa-loop-niter.h"
67#include "tree-ssa-loop.h"
68#include "tree-pass.h"
69#include "tree-scalar-evolution.h"
70#include "cfgloop.h"
71
72/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
73		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
74static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
75	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
76
77static void combine_predictions_for_insn (rtx, basic_block);
78static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
79static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
80static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
81static bool can_predict_insn_p (const_rtx);
82
83/* Information we hold about each branch predictor.
84   Filled using information from predict.def.  */
85
86struct predictor_info
87{
88  const char *const name;	/* Name used in the debugging dumps.  */
89  const int hitrate;		/* Expected hitrate used by
90				   predict_insn_def call.  */
91  const int flags;
92};
93
94/* Use given predictor without Dempster-Shaffer theory if it matches
95   using first_match heuristics.  */
96#define PRED_FLAG_FIRST_MATCH 1
97
98/* Recompute hitrate in percent to our representation.  */
99
100#define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101
102#define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
103static const struct predictor_info predictor_info[]= {
104#include "predict.def"
105
106  /* Upper bound on predictors.  */
107  {NULL, 0, 0}
108};
109#undef DEF_PREDICTOR
110
111/* Return TRUE if frequency FREQ is considered to be hot.  */
112
113static inline bool
114maybe_hot_frequency_p (struct function *fun, int freq)
115{
116  struct cgraph_node *node = cgraph_get_node (fun->decl);
117  if (!profile_info || !flag_branch_probabilities)
118    {
119      if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
120        return false;
121      if (node->frequency == NODE_FREQUENCY_HOT)
122        return true;
123    }
124  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
125    return true;
126  if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
127      && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
128    return false;
129  if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
130    return false;
131  if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
132	      / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
133    return false;
134  return true;
135}
136
137static gcov_type min_count = -1;
138
139/* Determine the threshold for hot BB counts.  */
140
141gcov_type
142get_hot_bb_threshold ()
143{
144  gcov_working_set_t *ws;
145  if (min_count == -1)
146    {
147      ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
148      gcc_assert (ws);
149      min_count = ws->min_counter;
150    }
151  return min_count;
152}
153
154/* Set the threshold for hot BB counts.  */
155
156void
157set_hot_bb_threshold (gcov_type min)
158{
159  min_count = min;
160}
161
162/* Return TRUE if frequency FREQ is considered to be hot.  */
163
164static inline bool
165maybe_hot_count_p (struct function *fun, gcov_type count)
166{
167  if (fun && profile_status_for_fn (fun) != PROFILE_READ)
168    return true;
169  /* Code executed at most once is not hot.  */
170  if (profile_info->runs >= count)
171    return false;
172  return (count >= get_hot_bb_threshold ());
173}
174
175/* Return true in case BB can be CPU intensive and should be optimized
176   for maximal performance.  */
177
178bool
179maybe_hot_bb_p (struct function *fun, const_basic_block bb)
180{
181  gcc_checking_assert (fun);
182  if (profile_status_for_fn (fun) == PROFILE_READ)
183    return maybe_hot_count_p (fun, bb->count);
184  return maybe_hot_frequency_p (fun, bb->frequency);
185}
186
187/* Return true if the call can be hot.  */
188
189bool
190cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
191{
192  if (profile_info && flag_branch_probabilities
193      && !maybe_hot_count_p (NULL,
194                             edge->count))
195    return false;
196  if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
197      || (edge->callee
198	  && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
199    return false;
200  if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
201      && (edge->callee
202	  && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
203    return false;
204  if (optimize_size)
205    return false;
206  if (edge->caller->frequency == NODE_FREQUENCY_HOT)
207    return true;
208  if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
209      && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
210    return false;
211  if (flag_guess_branch_prob)
212    {
213      if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
214	  || edge->frequency <= (CGRAPH_FREQ_BASE
215				 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
216        return false;
217    }
218  return true;
219}
220
221/* Return true in case BB can be CPU intensive and should be optimized
222   for maximal performance.  */
223
224bool
225maybe_hot_edge_p (edge e)
226{
227  if (profile_status_for_fn (cfun) == PROFILE_READ)
228    return maybe_hot_count_p (cfun, e->count);
229  return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
230}
231
232
233
234/* Return true if profile COUNT and FREQUENCY, or function FUN static
235   node frequency reflects never being executed.  */
236
237static bool
238probably_never_executed (struct function *fun,
239                         gcov_type count, int frequency)
240{
241  gcc_checking_assert (fun);
242  if (profile_status_for_fn (cfun) == PROFILE_READ)
243    {
244      int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
245      if (count * unlikely_count_fraction >= profile_info->runs)
246	return false;
247      if (!frequency)
248	return true;
249      if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
250	return false;
251      if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
252	{
253          gcov_type computed_count;
254          /* Check for possibility of overflow, in which case entry bb count
255             is large enough to do the division first without losing much
256             precision.  */
257	  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
258	      REG_BR_PROB_BASE)
259            {
260              gcov_type scaled_count
261		  = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
262	     unlikely_count_fraction;
263	      computed_count = RDIV (scaled_count,
264				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
265            }
266          else
267            {
268	      computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
269				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
270              computed_count *= frequency * unlikely_count_fraction;
271            }
272          if (computed_count >= profile_info->runs)
273            return false;
274	}
275      return true;
276    }
277  if ((!profile_info || !flag_branch_probabilities)
278      && (cgraph_get_node (fun->decl)->frequency
279	  == NODE_FREQUENCY_UNLIKELY_EXECUTED))
280    return true;
281  return false;
282}
283
284
285/* Return true in case BB is probably never executed.  */
286
287bool
288probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
289{
290  return probably_never_executed (fun, bb->count, bb->frequency);
291}
292
293
294/* Return true in case edge E is probably never executed.  */
295
296bool
297probably_never_executed_edge_p (struct function *fun, edge e)
298{
299  return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
300}
301
302/* Return true if NODE should be optimized for size.  */
303
304bool
305cgraph_optimize_for_size_p (struct cgraph_node *node)
306{
307  if (optimize_size)
308    return true;
309  if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
310    return true;
311  else
312    return false;
313}
314
315/* Return true when current function should always be optimized for size.  */
316
317bool
318optimize_function_for_size_p (struct function *fun)
319{
320  if (optimize_size)
321    return true;
322  if (!fun || !fun->decl)
323    return false;
324  return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
325}
326
327/* Return true when current function should always be optimized for speed.  */
328
329bool
330optimize_function_for_speed_p (struct function *fun)
331{
332  return !optimize_function_for_size_p (fun);
333}
334
335/* Return TRUE when BB should be optimized for size.  */
336
337bool
338optimize_bb_for_size_p (const_basic_block bb)
339{
340  return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
341}
342
343/* Return TRUE when BB should be optimized for speed.  */
344
345bool
346optimize_bb_for_speed_p (const_basic_block bb)
347{
348  return !optimize_bb_for_size_p (bb);
349}
350
351/* Return TRUE when BB should be optimized for size.  */
352
353bool
354optimize_edge_for_size_p (edge e)
355{
356  return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
357}
358
359/* Return TRUE when BB should be optimized for speed.  */
360
361bool
362optimize_edge_for_speed_p (edge e)
363{
364  return !optimize_edge_for_size_p (e);
365}
366
367/* Return TRUE when BB should be optimized for size.  */
368
369bool
370optimize_insn_for_size_p (void)
371{
372  return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
373}
374
375/* Return TRUE when BB should be optimized for speed.  */
376
377bool
378optimize_insn_for_speed_p (void)
379{
380  return !optimize_insn_for_size_p ();
381}
382
383/* Return TRUE when LOOP should be optimized for size.  */
384
385bool
386optimize_loop_for_size_p (struct loop *loop)
387{
388  return optimize_bb_for_size_p (loop->header);
389}
390
391/* Return TRUE when LOOP should be optimized for speed.  */
392
393bool
394optimize_loop_for_speed_p (struct loop *loop)
395{
396  return optimize_bb_for_speed_p (loop->header);
397}
398
399/* Return TRUE when LOOP nest should be optimized for speed.  */
400
401bool
402optimize_loop_nest_for_speed_p (struct loop *loop)
403{
404  struct loop *l = loop;
405  if (optimize_loop_for_speed_p (loop))
406    return true;
407  l = loop->inner;
408  while (l && l != loop)
409    {
410      if (optimize_loop_for_speed_p (l))
411        return true;
412      if (l->inner)
413        l = l->inner;
414      else if (l->next)
415        l = l->next;
416      else
417        {
418	  while (l != loop && !l->next)
419	    l = loop_outer (l);
420	  if (l != loop)
421	    l = l->next;
422	}
423    }
424  return false;
425}
426
427/* Return TRUE when LOOP nest should be optimized for size.  */
428
429bool
430optimize_loop_nest_for_size_p (struct loop *loop)
431{
432  return !optimize_loop_nest_for_speed_p (loop);
433}
434
435/* Return true when edge E is likely to be well predictable by branch
436   predictor.  */
437
438bool
439predictable_edge_p (edge e)
440{
441  if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
442    return false;
443  if ((e->probability
444       <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
445      || (REG_BR_PROB_BASE - e->probability
446          <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
447    return true;
448  return false;
449}
450
451
452/* Set RTL expansion for BB profile.  */
453
454void
455rtl_profile_for_bb (basic_block bb)
456{
457  crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
458}
459
460/* Set RTL expansion for edge profile.  */
461
462void
463rtl_profile_for_edge (edge e)
464{
465  crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
466}
467
468/* Set RTL expansion to default mode (i.e. when profile info is not known).  */
469void
470default_rtl_profile (void)
471{
472  crtl->maybe_hot_insn_p = true;
473}
474
475/* Return true if the one of outgoing edges is already predicted by
476   PREDICTOR.  */
477
478bool
479rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
480{
481  rtx note;
482  if (!INSN_P (BB_END (bb)))
483    return false;
484  for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
485    if (REG_NOTE_KIND (note) == REG_BR_PRED
486	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
487      return true;
488  return false;
489}
490
491/* This map contains for a basic block the list of predictions for the
492   outgoing edges.  */
493
494static struct pointer_map_t *bb_predictions;
495
496/*  Structure representing predictions in tree level. */
497
498struct edge_prediction {
499    struct edge_prediction *ep_next;
500    edge ep_edge;
501    enum br_predictor ep_predictor;
502    int ep_probability;
503};
504
505/* Return true if the one of outgoing edges is already predicted by
506   PREDICTOR.  */
507
508bool
509gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
510{
511  struct edge_prediction *i;
512  void **preds = pointer_map_contains (bb_predictions, bb);
513
514  if (!preds)
515    return false;
516
517  for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
518    if (i->ep_predictor == predictor)
519      return true;
520  return false;
521}
522
523/* Return true when the probability of edge is reliable.
524
525   The profile guessing code is good at predicting branch outcome (ie.
526   taken/not taken), that is predicted right slightly over 75% of time.
527   It is however notoriously poor on predicting the probability itself.
528   In general the profile appear a lot flatter (with probabilities closer
529   to 50%) than the reality so it is bad idea to use it to drive optimization
530   such as those disabling dynamic branch prediction for well predictable
531   branches.
532
533   There are two exceptions - edges leading to noreturn edges and edges
534   predicted by number of iterations heuristics are predicted well.  This macro
535   should be able to distinguish those, but at the moment it simply check for
536   noreturn heuristic that is only one giving probability over 99% or bellow
537   1%.  In future we might want to propagate reliability information across the
538   CFG if we find this information useful on multiple places.   */
539static bool
540probability_reliable_p (int prob)
541{
542  return (profile_status_for_fn (cfun) == PROFILE_READ
543	  || (profile_status_for_fn (cfun) == PROFILE_GUESSED
544	      && (prob <= HITRATE (1) || prob >= HITRATE (99))));
545}
546
547/* Same predicate as above, working on edges.  */
548bool
549edge_probability_reliable_p (const_edge e)
550{
551  return probability_reliable_p (e->probability);
552}
553
554/* Same predicate as edge_probability_reliable_p, working on notes.  */
555bool
556br_prob_note_reliable_p (const_rtx note)
557{
558  gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
559  return probability_reliable_p (XINT (note, 0));
560}
561
562static void
563predict_insn (rtx insn, enum br_predictor predictor, int probability)
564{
565  gcc_assert (any_condjump_p (insn));
566  if (!flag_guess_branch_prob)
567    return;
568
569  add_reg_note (insn, REG_BR_PRED,
570		gen_rtx_CONCAT (VOIDmode,
571				GEN_INT ((int) predictor),
572				GEN_INT ((int) probability)));
573}
574
575/* Predict insn by given predictor.  */
576
577void
578predict_insn_def (rtx insn, enum br_predictor predictor,
579		  enum prediction taken)
580{
581   int probability = predictor_info[(int) predictor].hitrate;
582
583   if (taken != TAKEN)
584     probability = REG_BR_PROB_BASE - probability;
585
586   predict_insn (insn, predictor, probability);
587}
588
589/* Predict edge E with given probability if possible.  */
590
591void
592rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
593{
594  rtx last_insn;
595  last_insn = BB_END (e->src);
596
597  /* We can store the branch prediction information only about
598     conditional jumps.  */
599  if (!any_condjump_p (last_insn))
600    return;
601
602  /* We always store probability of branching.  */
603  if (e->flags & EDGE_FALLTHRU)
604    probability = REG_BR_PROB_BASE - probability;
605
606  predict_insn (last_insn, predictor, probability);
607}
608
609/* Predict edge E with the given PROBABILITY.  */
610void
611gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
612{
613  gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
614  if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
615       1)
616      && flag_guess_branch_prob && optimize)
617    {
618      struct edge_prediction *i = XNEW (struct edge_prediction);
619      void **preds = pointer_map_insert (bb_predictions, e->src);
620
621      i->ep_next = (struct edge_prediction *) *preds;
622      *preds = i;
623      i->ep_probability = probability;
624      i->ep_predictor = predictor;
625      i->ep_edge = e;
626    }
627}
628
629/* Remove all predictions on given basic block that are attached
630   to edge E.  */
631void
632remove_predictions_associated_with_edge (edge e)
633{
634  void **preds;
635
636  if (!bb_predictions)
637    return;
638
639  preds = pointer_map_contains (bb_predictions, e->src);
640
641  if (preds)
642    {
643      struct edge_prediction **prediction = (struct edge_prediction **) preds;
644      struct edge_prediction *next;
645
646      while (*prediction)
647	{
648	  if ((*prediction)->ep_edge == e)
649	    {
650	      next = (*prediction)->ep_next;
651	      free (*prediction);
652	      *prediction = next;
653	    }
654	  else
655	    prediction = &((*prediction)->ep_next);
656	}
657    }
658}
659
660/* Clears the list of predictions stored for BB.  */
661
662static void
663clear_bb_predictions (basic_block bb)
664{
665  void **preds = pointer_map_contains (bb_predictions, bb);
666  struct edge_prediction *pred, *next;
667
668  if (!preds)
669    return;
670
671  for (pred = (struct edge_prediction *) *preds; pred; pred = next)
672    {
673      next = pred->ep_next;
674      free (pred);
675    }
676  *preds = NULL;
677}
678
679/* Return true when we can store prediction on insn INSN.
680   At the moment we represent predictions only on conditional
681   jumps, not at computed jump or other complicated cases.  */
682static bool
683can_predict_insn_p (const_rtx insn)
684{
685  return (JUMP_P (insn)
686	  && any_condjump_p (insn)
687	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
688}
689
690/* Predict edge E by given predictor if possible.  */
691
692void
693predict_edge_def (edge e, enum br_predictor predictor,
694		  enum prediction taken)
695{
696   int probability = predictor_info[(int) predictor].hitrate;
697
698   if (taken != TAKEN)
699     probability = REG_BR_PROB_BASE - probability;
700
701   predict_edge (e, predictor, probability);
702}
703
704/* Invert all branch predictions or probability notes in the INSN.  This needs
705   to be done each time we invert the condition used by the jump.  */
706
707void
708invert_br_probabilities (rtx insn)
709{
710  rtx note;
711
712  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
713    if (REG_NOTE_KIND (note) == REG_BR_PROB)
714      XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
715    else if (REG_NOTE_KIND (note) == REG_BR_PRED)
716      XEXP (XEXP (note, 0), 1)
717	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
718}
719
720/* Dump information about the branch prediction to the output file.  */
721
722static void
723dump_prediction (FILE *file, enum br_predictor predictor, int probability,
724		 basic_block bb, int used)
725{
726  edge e;
727  edge_iterator ei;
728
729  if (!file)
730    return;
731
732  FOR_EACH_EDGE (e, ei, bb->succs)
733    if (! (e->flags & EDGE_FALLTHRU))
734      break;
735
736  fprintf (file, "  %s heuristics%s: %.1f%%",
737	   predictor_info[predictor].name,
738	   used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
739
740  if (bb->count)
741    {
742      fprintf (file, "  exec ");
743      fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
744      if (e)
745	{
746	  fprintf (file, " hit ");
747	  fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
748	  fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
749	}
750    }
751
752  fprintf (file, "\n");
753}
754
755/* We can not predict the probabilities of outgoing edges of bb.  Set them
756   evenly and hope for the best.  */
757static void
758set_even_probabilities (basic_block bb)
759{
760  int nedges = 0;
761  edge e;
762  edge_iterator ei;
763
764  FOR_EACH_EDGE (e, ei, bb->succs)
765    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
766      nedges ++;
767  FOR_EACH_EDGE (e, ei, bb->succs)
768    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
769      e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
770    else
771      e->probability = 0;
772}
773
774/* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
775   note if not already present.  Remove now useless REG_BR_PRED notes.  */
776
777static void
778combine_predictions_for_insn (rtx insn, basic_block bb)
779{
780  rtx prob_note;
781  rtx *pnote;
782  rtx note;
783  int best_probability = PROB_EVEN;
784  enum br_predictor best_predictor = END_PREDICTORS;
785  int combined_probability = REG_BR_PROB_BASE / 2;
786  int d;
787  bool first_match = false;
788  bool found = false;
789
790  if (!can_predict_insn_p (insn))
791    {
792      set_even_probabilities (bb);
793      return;
794    }
795
796  prob_note = find_reg_note (insn, REG_BR_PROB, 0);
797  pnote = &REG_NOTES (insn);
798  if (dump_file)
799    fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
800	     bb->index);
801
802  /* We implement "first match" heuristics and use probability guessed
803     by predictor with smallest index.  */
804  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
805    if (REG_NOTE_KIND (note) == REG_BR_PRED)
806      {
807	enum br_predictor predictor = ((enum br_predictor)
808				       INTVAL (XEXP (XEXP (note, 0), 0)));
809	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
810
811	found = true;
812	if (best_predictor > predictor)
813	  best_probability = probability, best_predictor = predictor;
814
815	d = (combined_probability * probability
816	     + (REG_BR_PROB_BASE - combined_probability)
817	     * (REG_BR_PROB_BASE - probability));
818
819	/* Use FP math to avoid overflows of 32bit integers.  */
820	if (d == 0)
821	  /* If one probability is 0% and one 100%, avoid division by zero.  */
822	  combined_probability = REG_BR_PROB_BASE / 2;
823	else
824	  combined_probability = (((double) combined_probability) * probability
825				  * REG_BR_PROB_BASE / d + 0.5);
826      }
827
828  /* Decide which heuristic to use.  In case we didn't match anything,
829     use no_prediction heuristic, in case we did match, use either
830     first match or Dempster-Shaffer theory depending on the flags.  */
831
832  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
833    first_match = true;
834
835  if (!found)
836    dump_prediction (dump_file, PRED_NO_PREDICTION,
837		     combined_probability, bb, true);
838  else
839    {
840      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
841		       bb, !first_match);
842      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
843		       bb, first_match);
844    }
845
846  if (first_match)
847    combined_probability = best_probability;
848  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
849
850  while (*pnote)
851    {
852      if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
853	{
854	  enum br_predictor predictor = ((enum br_predictor)
855					 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
856	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
857
858	  dump_prediction (dump_file, predictor, probability, bb,
859			   !first_match || best_predictor == predictor);
860	  *pnote = XEXP (*pnote, 1);
861	}
862      else
863	pnote = &XEXP (*pnote, 1);
864    }
865
866  if (!prob_note)
867    {
868      add_int_reg_note (insn, REG_BR_PROB, combined_probability);
869
870      /* Save the prediction into CFG in case we are seeing non-degenerated
871	 conditional jump.  */
872      if (!single_succ_p (bb))
873	{
874	  BRANCH_EDGE (bb)->probability = combined_probability;
875	  FALLTHRU_EDGE (bb)->probability
876	    = REG_BR_PROB_BASE - combined_probability;
877	}
878    }
879  else if (!single_succ_p (bb))
880    {
881      int prob = XINT (prob_note, 0);
882
883      BRANCH_EDGE (bb)->probability = prob;
884      FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
885    }
886  else
887    single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
888}
889
890/* Combine predictions into single probability and store them into CFG.
891   Remove now useless prediction entries.  */
892
893static void
894combine_predictions_for_bb (basic_block bb)
895{
896  int best_probability = PROB_EVEN;
897  enum br_predictor best_predictor = END_PREDICTORS;
898  int combined_probability = REG_BR_PROB_BASE / 2;
899  int d;
900  bool first_match = false;
901  bool found = false;
902  struct edge_prediction *pred;
903  int nedges = 0;
904  edge e, first = NULL, second = NULL;
905  edge_iterator ei;
906  void **preds;
907
908  FOR_EACH_EDGE (e, ei, bb->succs)
909    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
910      {
911	nedges ++;
912	if (first && !second)
913	  second = e;
914	if (!first)
915	  first = e;
916      }
917
918  /* When there is no successor or only one choice, prediction is easy.
919
920     We are lazy for now and predict only basic blocks with two outgoing
921     edges.  It is possible to predict generic case too, but we have to
922     ignore first match heuristics and do more involved combining.  Implement
923     this later.  */
924  if (nedges != 2)
925    {
926      if (!bb->count)
927	set_even_probabilities (bb);
928      clear_bb_predictions (bb);
929      if (dump_file)
930	fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
931		 nedges, bb->index);
932      return;
933    }
934
935  if (dump_file)
936    fprintf (dump_file, "Predictions for bb %i\n", bb->index);
937
938  preds = pointer_map_contains (bb_predictions, bb);
939  if (preds)
940    {
941      /* We implement "first match" heuristics and use probability guessed
942	 by predictor with smallest index.  */
943      for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
944	{
945	  enum br_predictor predictor = pred->ep_predictor;
946	  int probability = pred->ep_probability;
947
948	  if (pred->ep_edge != first)
949	    probability = REG_BR_PROB_BASE - probability;
950
951	  found = true;
952	  /* First match heuristics would be widly confused if we predicted
953	     both directions.  */
954	  if (best_predictor > predictor)
955	    {
956              struct edge_prediction *pred2;
957	      int prob = probability;
958
959	      for (pred2 = (struct edge_prediction *) *preds;
960		   pred2; pred2 = pred2->ep_next)
961	       if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
962	         {
963	           int probability2 = pred->ep_probability;
964
965		   if (pred2->ep_edge != first)
966		     probability2 = REG_BR_PROB_BASE - probability2;
967
968		   if ((probability < REG_BR_PROB_BASE / 2) !=
969		       (probability2 < REG_BR_PROB_BASE / 2))
970		     break;
971
972		   /* If the same predictor later gave better result, go for it! */
973		   if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
974		       || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
975		     prob = probability2;
976		 }
977	      if (!pred2)
978	        best_probability = prob, best_predictor = predictor;
979	    }
980
981	  d = (combined_probability * probability
982	       + (REG_BR_PROB_BASE - combined_probability)
983	       * (REG_BR_PROB_BASE - probability));
984
985	  /* Use FP math to avoid overflows of 32bit integers.  */
986	  if (d == 0)
987	    /* If one probability is 0% and one 100%, avoid division by zero.  */
988	    combined_probability = REG_BR_PROB_BASE / 2;
989	  else
990	    combined_probability = (((double) combined_probability)
991				    * probability
992		    		    * REG_BR_PROB_BASE / d + 0.5);
993	}
994    }
995
996  /* Decide which heuristic to use.  In case we didn't match anything,
997     use no_prediction heuristic, in case we did match, use either
998     first match or Dempster-Shaffer theory depending on the flags.  */
999
1000  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
1001    first_match = true;
1002
1003  if (!found)
1004    dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1005  else
1006    {
1007      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1008		       !first_match);
1009      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1010		       first_match);
1011    }
1012
1013  if (first_match)
1014    combined_probability = best_probability;
1015  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1016
1017  if (preds)
1018    {
1019      for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1020	{
1021	  enum br_predictor predictor = pred->ep_predictor;
1022	  int probability = pred->ep_probability;
1023
1024	  if (pred->ep_edge != EDGE_SUCC (bb, 0))
1025	    probability = REG_BR_PROB_BASE - probability;
1026	  dump_prediction (dump_file, predictor, probability, bb,
1027			   !first_match || best_predictor == predictor);
1028	}
1029    }
1030  clear_bb_predictions (bb);
1031
1032  if (!bb->count)
1033    {
1034      first->probability = combined_probability;
1035      second->probability = REG_BR_PROB_BASE - combined_probability;
1036    }
1037}
1038
1039/* Check if T1 and T2 satisfy the IV_COMPARE condition.
1040   Return the SSA_NAME if the condition satisfies, NULL otherwise.
1041
1042   T1 and T2 should be one of the following cases:
1043     1. T1 is SSA_NAME, T2 is NULL
1044     2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1045     3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1046
1047static tree
1048strips_small_constant (tree t1, tree t2)
1049{
1050  tree ret = NULL;
1051  int value = 0;
1052
1053  if (!t1)
1054    return NULL;
1055  else if (TREE_CODE (t1) == SSA_NAME)
1056    ret = t1;
1057  else if (tree_fits_shwi_p (t1))
1058    value = tree_to_shwi (t1);
1059  else
1060    return NULL;
1061
1062  if (!t2)
1063    return ret;
1064  else if (tree_fits_shwi_p (t2))
1065    value = tree_to_shwi (t2);
1066  else if (TREE_CODE (t2) == SSA_NAME)
1067    {
1068      if (ret)
1069        return NULL;
1070      else
1071        ret = t2;
1072    }
1073
1074  if (value <= 4 && value >= -4)
1075    return ret;
1076  else
1077    return NULL;
1078}
1079
1080/* Return the SSA_NAME in T or T's operands.
1081   Return NULL if SSA_NAME cannot be found.  */
1082
1083static tree
1084get_base_value (tree t)
1085{
1086  if (TREE_CODE (t) == SSA_NAME)
1087    return t;
1088
1089  if (!BINARY_CLASS_P (t))
1090    return NULL;
1091
1092  switch (TREE_OPERAND_LENGTH (t))
1093    {
1094    case 1:
1095      return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1096    case 2:
1097      return strips_small_constant (TREE_OPERAND (t, 0),
1098				    TREE_OPERAND (t, 1));
1099    default:
1100      return NULL;
1101    }
1102}
1103
1104/* Check the compare STMT in LOOP. If it compares an induction
1105   variable to a loop invariant, return true, and save
1106   LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1107   Otherwise return false and set LOOP_INVAIANT to NULL.  */
1108
1109static bool
1110is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1111				     tree *loop_invariant,
1112				     enum tree_code *compare_code,
1113				     tree *loop_step,
1114				     tree *loop_iv_base)
1115{
1116  tree op0, op1, bound, base;
1117  affine_iv iv0, iv1;
1118  enum tree_code code;
1119  tree step;
1120
1121  code = gimple_cond_code (stmt);
1122  *loop_invariant = NULL;
1123
1124  switch (code)
1125    {
1126    case GT_EXPR:
1127    case GE_EXPR:
1128    case NE_EXPR:
1129    case LT_EXPR:
1130    case LE_EXPR:
1131    case EQ_EXPR:
1132      break;
1133
1134    default:
1135      return false;
1136    }
1137
1138  op0 = gimple_cond_lhs (stmt);
1139  op1 = gimple_cond_rhs (stmt);
1140
1141  if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1142       || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1143    return false;
1144  if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1145    return false;
1146  if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1147    return false;
1148  if (TREE_CODE (iv0.step) != INTEGER_CST
1149      || TREE_CODE (iv1.step) != INTEGER_CST)
1150    return false;
1151  if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1152      || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1153    return false;
1154
1155  if (integer_zerop (iv0.step))
1156    {
1157      if (code != NE_EXPR && code != EQ_EXPR)
1158	code = invert_tree_comparison (code, false);
1159      bound = iv0.base;
1160      base = iv1.base;
1161      if (tree_fits_shwi_p (iv1.step))
1162	step = iv1.step;
1163      else
1164	return false;
1165    }
1166  else
1167    {
1168      bound = iv1.base;
1169      base = iv0.base;
1170      if (tree_fits_shwi_p (iv0.step))
1171	step = iv0.step;
1172      else
1173	return false;
1174    }
1175
1176  if (TREE_CODE (bound) != INTEGER_CST)
1177    bound = get_base_value (bound);
1178  if (!bound)
1179    return false;
1180  if (TREE_CODE (base) != INTEGER_CST)
1181    base = get_base_value (base);
1182  if (!base)
1183    return false;
1184
1185  *loop_invariant = bound;
1186  *compare_code = code;
1187  *loop_step = step;
1188  *loop_iv_base = base;
1189  return true;
1190}
1191
1192/* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1193
1194static bool
1195expr_coherent_p (tree t1, tree t2)
1196{
1197  gimple stmt;
1198  tree ssa_name_1 = NULL;
1199  tree ssa_name_2 = NULL;
1200
1201  gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1202  gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1203
1204  if (t1 == t2)
1205    return true;
1206
1207  if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1208    return true;
1209  if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1210    return false;
1211
1212  /* Check to see if t1 is expressed/defined with t2.  */
1213  stmt = SSA_NAME_DEF_STMT (t1);
1214  gcc_assert (stmt != NULL);
1215  if (is_gimple_assign (stmt))
1216    {
1217      ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1218      if (ssa_name_1 && ssa_name_1 == t2)
1219	return true;
1220    }
1221
1222  /* Check to see if t2 is expressed/defined with t1.  */
1223  stmt = SSA_NAME_DEF_STMT (t2);
1224  gcc_assert (stmt != NULL);
1225  if (is_gimple_assign (stmt))
1226    {
1227      ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1228      if (ssa_name_2 && ssa_name_2 == t1)
1229	return true;
1230    }
1231
1232  /* Compare if t1 and t2's def_stmts are identical.  */
1233  if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1234    return true;
1235  else
1236    return false;
1237}
1238
1239/* Predict branch probability of BB when BB contains a branch that compares
1240   an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1241   loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1242
1243   E.g.
1244     for (int i = 0; i < bound; i++) {
1245       if (i < bound - 2)
1246	 computation_1();
1247       else
1248	 computation_2();
1249     }
1250
1251  In this loop, we will predict the branch inside the loop to be taken.  */
1252
1253static void
1254predict_iv_comparison (struct loop *loop, basic_block bb,
1255		       tree loop_bound_var,
1256		       tree loop_iv_base_var,
1257		       enum tree_code loop_bound_code,
1258		       int loop_bound_step)
1259{
1260  gimple stmt;
1261  tree compare_var, compare_base;
1262  enum tree_code compare_code;
1263  tree compare_step_var;
1264  edge then_edge;
1265  edge_iterator ei;
1266
1267  if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1268      || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1269      || predicted_by_p (bb, PRED_LOOP_EXIT))
1270    return;
1271
1272  stmt = last_stmt (bb);
1273  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1274    return;
1275  if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1276					    &compare_code,
1277					    &compare_step_var,
1278					    &compare_base))
1279    return;
1280
1281  /* Find the taken edge.  */
1282  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1283    if (then_edge->flags & EDGE_TRUE_VALUE)
1284      break;
1285
1286  /* When comparing an IV to a loop invariant, NE is more likely to be
1287     taken while EQ is more likely to be not-taken.  */
1288  if (compare_code == NE_EXPR)
1289    {
1290      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1291      return;
1292    }
1293  else if (compare_code == EQ_EXPR)
1294    {
1295      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1296      return;
1297    }
1298
1299  if (!expr_coherent_p (loop_iv_base_var, compare_base))
1300    return;
1301
1302  /* If loop bound, base and compare bound are all constants, we can
1303     calculate the probability directly.  */
1304  if (tree_fits_shwi_p (loop_bound_var)
1305      && tree_fits_shwi_p (compare_var)
1306      && tree_fits_shwi_p (compare_base))
1307    {
1308      int probability;
1309      bool of, overflow = false;
1310      double_int mod, compare_count, tem, loop_count;
1311
1312      double_int loop_bound = tree_to_double_int (loop_bound_var);
1313      double_int compare_bound = tree_to_double_int (compare_var);
1314      double_int base = tree_to_double_int (compare_base);
1315      double_int compare_step = tree_to_double_int (compare_step_var);
1316
1317      /* (loop_bound - base) / compare_step */
1318      tem = loop_bound.sub_with_overflow (base, &of);
1319      overflow |= of;
1320      loop_count = tem.divmod_with_overflow (compare_step,
1321					      0, TRUNC_DIV_EXPR,
1322					      &mod, &of);
1323      overflow |= of;
1324
1325      if ((!compare_step.is_negative ())
1326          ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1327	{
1328	  /* (loop_bound - compare_bound) / compare_step */
1329	  tem = loop_bound.sub_with_overflow (compare_bound, &of);
1330	  overflow |= of;
1331	  compare_count = tem.divmod_with_overflow (compare_step,
1332						     0, TRUNC_DIV_EXPR,
1333						     &mod, &of);
1334	  overflow |= of;
1335	}
1336      else
1337        {
1338	  /* (compare_bound - base) / compare_step */
1339	  tem = compare_bound.sub_with_overflow (base, &of);
1340	  overflow |= of;
1341          compare_count = tem.divmod_with_overflow (compare_step,
1342						     0, TRUNC_DIV_EXPR,
1343						     &mod, &of);
1344	  overflow |= of;
1345	}
1346      if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1347	++compare_count;
1348      if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1349	++loop_count;
1350      if (compare_count.is_negative ())
1351        compare_count = double_int_zero;
1352      if (loop_count.is_negative ())
1353        loop_count = double_int_zero;
1354      if (loop_count.is_zero ())
1355	probability = 0;
1356      else if (compare_count.scmp (loop_count) == 1)
1357	probability = REG_BR_PROB_BASE;
1358      else
1359        {
1360	  /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1361	     could overflow, shift both loop_count and compare_count right
1362	     a bit so that it doesn't overflow.  Note both counts are known not
1363	     to be negative at this point.  */
1364	  int clz_bits = clz_hwi (loop_count.high);
1365	  gcc_assert (REG_BR_PROB_BASE < 32768);
1366	  if (clz_bits < 16)
1367	    {
1368	      loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1369	      compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1370	    }
1371	  tem = compare_count.mul_with_sign (double_int::from_shwi
1372					    (REG_BR_PROB_BASE), true, &of);
1373	  gcc_assert (!of);
1374	  tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1375	  probability = tem.to_uhwi ();
1376	}
1377
1378      if (!overflow)
1379        predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1380
1381      return;
1382    }
1383
1384  if (expr_coherent_p (loop_bound_var, compare_var))
1385    {
1386      if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1387	  && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1388	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1389      else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1390	       && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1391	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1392      else if (loop_bound_code == NE_EXPR)
1393	{
1394	  /* If the loop backedge condition is "(i != bound)", we do
1395	     the comparison based on the step of IV:
1396	     * step < 0 : backedge condition is like (i > bound)
1397	     * step > 0 : backedge condition is like (i < bound)  */
1398	  gcc_assert (loop_bound_step != 0);
1399	  if (loop_bound_step > 0
1400	      && (compare_code == LT_EXPR
1401		  || compare_code == LE_EXPR))
1402	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1403	  else if (loop_bound_step < 0
1404		   && (compare_code == GT_EXPR
1405		       || compare_code == GE_EXPR))
1406	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1407	  else
1408	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1409	}
1410      else
1411	/* The branch is predicted not-taken if loop_bound_code is
1412	   opposite with compare_code.  */
1413	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1414    }
1415  else if (expr_coherent_p (loop_iv_base_var, compare_var))
1416    {
1417      /* For cases like:
1418	   for (i = s; i < h; i++)
1419	     if (i > s + 2) ....
1420	 The branch should be predicted taken.  */
1421      if (loop_bound_step > 0
1422	  && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1423	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1424      else if (loop_bound_step < 0
1425	       && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1426	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1427      else
1428	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1429    }
1430}
1431
1432/* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1433   exits are resulted from short-circuit conditions that will generate an
1434   if_tmp. E.g.:
1435
1436   if (foo() || global > 10)
1437     break;
1438
1439   This will be translated into:
1440
1441   BB3:
1442     loop header...
1443   BB4:
1444     if foo() goto BB6 else goto BB5
1445   BB5:
1446     if global > 10 goto BB6 else goto BB7
1447   BB6:
1448     goto BB7
1449   BB7:
1450     iftmp = (PHI 0(BB5), 1(BB6))
1451     if iftmp == 1 goto BB8 else goto BB3
1452   BB8:
1453     outside of the loop...
1454
1455   The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1456   From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1457   exits. This function takes BB7->BB8 as input, and finds out the extra loop
1458   exits to predict them using PRED_LOOP_EXIT.  */
1459
1460static void
1461predict_extra_loop_exits (edge exit_edge)
1462{
1463  unsigned i;
1464  bool check_value_one;
1465  gimple phi_stmt;
1466  tree cmp_rhs, cmp_lhs;
1467  gimple cmp_stmt = last_stmt (exit_edge->src);
1468
1469  if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1470    return;
1471  cmp_rhs = gimple_cond_rhs (cmp_stmt);
1472  cmp_lhs = gimple_cond_lhs (cmp_stmt);
1473  if (!TREE_CONSTANT (cmp_rhs)
1474      || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1475    return;
1476  if (TREE_CODE (cmp_lhs) != SSA_NAME)
1477    return;
1478
1479  /* If check_value_one is true, only the phi_args with value '1' will lead
1480     to loop exit. Otherwise, only the phi_args with value '0' will lead to
1481     loop exit.  */
1482  check_value_one = (((integer_onep (cmp_rhs))
1483		    ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1484		    ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1485
1486  phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1487  if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1488    return;
1489
1490  for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1491    {
1492      edge e1;
1493      edge_iterator ei;
1494      tree val = gimple_phi_arg_def (phi_stmt, i);
1495      edge e = gimple_phi_arg_edge (phi_stmt, i);
1496
1497      if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1498	continue;
1499      if ((check_value_one ^ integer_onep (val)) == 1)
1500	continue;
1501      if (EDGE_COUNT (e->src->succs) != 1)
1502	{
1503	  predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1504	  continue;
1505	}
1506
1507      FOR_EACH_EDGE (e1, ei, e->src->preds)
1508	predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1509    }
1510}
1511
1512/* Predict edge probabilities by exploiting loop structure.  */
1513
1514static void
1515predict_loops (void)
1516{
1517  struct loop *loop;
1518
1519  /* Try to predict out blocks in a loop that are not part of a
1520     natural loop.  */
1521  FOR_EACH_LOOP (loop, 0)
1522    {
1523      basic_block bb, *bbs;
1524      unsigned j, n_exits;
1525      vec<edge> exits;
1526      struct tree_niter_desc niter_desc;
1527      edge ex;
1528      struct nb_iter_bound *nb_iter;
1529      enum tree_code loop_bound_code = ERROR_MARK;
1530      tree loop_bound_step = NULL;
1531      tree loop_bound_var = NULL;
1532      tree loop_iv_base = NULL;
1533      gimple stmt = NULL;
1534
1535      exits = get_loop_exit_edges (loop);
1536      n_exits = exits.length ();
1537      if (!n_exits)
1538	{
1539          exits.release ();
1540	  continue;
1541	}
1542
1543      FOR_EACH_VEC_ELT (exits, j, ex)
1544	{
1545	  tree niter = NULL;
1546	  HOST_WIDE_INT nitercst;
1547	  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1548	  int probability;
1549	  enum br_predictor predictor;
1550
1551	  predict_extra_loop_exits (ex);
1552
1553	  if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1554	    niter = niter_desc.niter;
1555	  if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1556	    niter = loop_niter_by_eval (loop, ex);
1557
1558	  if (TREE_CODE (niter) == INTEGER_CST)
1559	    {
1560	      if (tree_fits_uhwi_p (niter)
1561		  && max
1562		  && compare_tree_int (niter, max - 1) == -1)
1563		nitercst = tree_to_uhwi (niter) + 1;
1564	      else
1565		nitercst = max;
1566	      predictor = PRED_LOOP_ITERATIONS;
1567	    }
1568	  /* If we have just one exit and we can derive some information about
1569	     the number of iterations of the loop from the statements inside
1570	     the loop, use it to predict this exit.  */
1571	  else if (n_exits == 1)
1572	    {
1573	      nitercst = estimated_stmt_executions_int (loop);
1574	      if (nitercst < 0)
1575		continue;
1576	      if (nitercst > max)
1577		nitercst = max;
1578
1579	      predictor = PRED_LOOP_ITERATIONS_GUESSED;
1580	    }
1581	  else
1582	    continue;
1583
1584	  /* If the prediction for number of iterations is zero, do not
1585	     predict the exit edges.  */
1586	  if (nitercst == 0)
1587	    continue;
1588
1589	  probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1590	  predict_edge (ex, predictor, probability);
1591	}
1592      exits.release ();
1593
1594      /* Find information about loop bound variables.  */
1595      for (nb_iter = loop->bounds; nb_iter;
1596	   nb_iter = nb_iter->next)
1597	if (nb_iter->stmt
1598	    && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1599	  {
1600	    stmt = nb_iter->stmt;
1601	    break;
1602	  }
1603      if (!stmt && last_stmt (loop->header)
1604	  && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1605	stmt = last_stmt (loop->header);
1606      if (stmt)
1607	is_comparison_with_loop_invariant_p (stmt, loop,
1608					     &loop_bound_var,
1609					     &loop_bound_code,
1610					     &loop_bound_step,
1611					     &loop_iv_base);
1612
1613      bbs = get_loop_body (loop);
1614
1615      for (j = 0; j < loop->num_nodes; j++)
1616	{
1617	  int header_found = 0;
1618	  edge e;
1619	  edge_iterator ei;
1620
1621	  bb = bbs[j];
1622
1623	  /* Bypass loop heuristics on continue statement.  These
1624	     statements construct loops via "non-loop" constructs
1625	     in the source language and are better to be handled
1626	     separately.  */
1627	  if (predicted_by_p (bb, PRED_CONTINUE))
1628	    continue;
1629
1630	  /* Loop branch heuristics - predict an edge back to a
1631	     loop's head as taken.  */
1632	  if (bb == loop->latch)
1633	    {
1634	      e = find_edge (loop->latch, loop->header);
1635	      if (e)
1636		{
1637		  header_found = 1;
1638		  predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1639		}
1640	    }
1641
1642	  /* Loop exit heuristics - predict an edge exiting the loop if the
1643	     conditional has no loop header successors as not taken.  */
1644	  if (!header_found
1645	      /* If we already used more reliable loop exit predictors, do not
1646		 bother with PRED_LOOP_EXIT.  */
1647	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1648	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1649	    {
1650	      /* For loop with many exits we don't want to predict all exits
1651	         with the pretty large probability, because if all exits are
1652		 considered in row, the loop would be predicted to iterate
1653		 almost never.  The code to divide probability by number of
1654		 exits is very rough.  It should compute the number of exits
1655		 taken in each patch through function (not the overall number
1656		 of exits that might be a lot higher for loops with wide switch
1657		 statements in them) and compute n-th square root.
1658
1659		 We limit the minimal probability by 2% to avoid
1660		 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1661		 as this was causing regression in perl benchmark containing such
1662		 a wide loop.  */
1663
1664	      int probability = ((REG_BR_PROB_BASE
1665		                  - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1666				 / n_exits);
1667	      if (probability < HITRATE (2))
1668		probability = HITRATE (2);
1669	      FOR_EACH_EDGE (e, ei, bb->succs)
1670		if (e->dest->index < NUM_FIXED_BLOCKS
1671		    || !flow_bb_inside_loop_p (loop, e->dest))
1672		  predict_edge (e, PRED_LOOP_EXIT, probability);
1673	    }
1674	  if (loop_bound_var)
1675	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1676				   loop_bound_code,
1677				   tree_to_shwi (loop_bound_step));
1678	}
1679
1680      /* Free basic blocks from get_loop_body.  */
1681      free (bbs);
1682    }
1683}
1684
1685/* Attempt to predict probabilities of BB outgoing edges using local
1686   properties.  */
1687static void
1688bb_estimate_probability_locally (basic_block bb)
1689{
1690  rtx last_insn = BB_END (bb);
1691  rtx cond;
1692
1693  if (! can_predict_insn_p (last_insn))
1694    return;
1695  cond = get_condition (last_insn, NULL, false, false);
1696  if (! cond)
1697    return;
1698
1699  /* Try "pointer heuristic."
1700     A comparison ptr == 0 is predicted as false.
1701     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1702  if (COMPARISON_P (cond)
1703      && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1704	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1705    {
1706      if (GET_CODE (cond) == EQ)
1707	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1708      else if (GET_CODE (cond) == NE)
1709	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1710    }
1711  else
1712
1713  /* Try "opcode heuristic."
1714     EQ tests are usually false and NE tests are usually true. Also,
1715     most quantities are positive, so we can make the appropriate guesses
1716     about signed comparisons against zero.  */
1717    switch (GET_CODE (cond))
1718      {
1719      case CONST_INT:
1720	/* Unconditional branch.  */
1721	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1722			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
1723	break;
1724
1725      case EQ:
1726      case UNEQ:
1727	/* Floating point comparisons appears to behave in a very
1728	   unpredictable way because of special role of = tests in
1729	   FP code.  */
1730	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1731	  ;
1732	/* Comparisons with 0 are often used for booleans and there is
1733	   nothing useful to predict about them.  */
1734	else if (XEXP (cond, 1) == const0_rtx
1735		 || XEXP (cond, 0) == const0_rtx)
1736	  ;
1737	else
1738	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1739	break;
1740
1741      case NE:
1742      case LTGT:
1743	/* Floating point comparisons appears to behave in a very
1744	   unpredictable way because of special role of = tests in
1745	   FP code.  */
1746	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1747	  ;
1748	/* Comparisons with 0 are often used for booleans and there is
1749	   nothing useful to predict about them.  */
1750	else if (XEXP (cond, 1) == const0_rtx
1751		 || XEXP (cond, 0) == const0_rtx)
1752	  ;
1753	else
1754	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1755	break;
1756
1757      case ORDERED:
1758	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1759	break;
1760
1761      case UNORDERED:
1762	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1763	break;
1764
1765      case LE:
1766      case LT:
1767	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1768	    || XEXP (cond, 1) == constm1_rtx)
1769	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1770	break;
1771
1772      case GE:
1773      case GT:
1774	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1775	    || XEXP (cond, 1) == constm1_rtx)
1776	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1777	break;
1778
1779      default:
1780	break;
1781      }
1782}
1783
1784/* Set edge->probability for each successor edge of BB.  */
1785void
1786guess_outgoing_edge_probabilities (basic_block bb)
1787{
1788  bb_estimate_probability_locally (bb);
1789  combine_predictions_for_insn (BB_END (bb), bb);
1790}
1791
1792static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1793
1794/* Helper function for expr_expected_value.  */
1795
1796static tree
1797expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1798		       tree op1, bitmap visited, enum br_predictor *predictor)
1799{
1800  gimple def;
1801
1802  if (predictor)
1803    *predictor = PRED_UNCONDITIONAL;
1804
1805  if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1806    {
1807      if (TREE_CONSTANT (op0))
1808	return op0;
1809
1810      if (code != SSA_NAME)
1811	return NULL_TREE;
1812
1813      def = SSA_NAME_DEF_STMT (op0);
1814
1815      /* If we were already here, break the infinite cycle.  */
1816      if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1817	return NULL;
1818
1819      if (gimple_code (def) == GIMPLE_PHI)
1820	{
1821	  /* All the arguments of the PHI node must have the same constant
1822	     length.  */
1823	  int i, n = gimple_phi_num_args (def);
1824	  tree val = NULL, new_val;
1825
1826	  for (i = 0; i < n; i++)
1827	    {
1828	      tree arg = PHI_ARG_DEF (def, i);
1829	      enum br_predictor predictor2;
1830
1831	      /* If this PHI has itself as an argument, we cannot
1832		 determine the string length of this argument.  However,
1833		 if we can find an expected constant value for the other
1834		 PHI args then we can still be sure that this is
1835		 likely a constant.  So be optimistic and just
1836		 continue with the next argument.  */
1837	      if (arg == PHI_RESULT (def))
1838		continue;
1839
1840	      new_val = expr_expected_value (arg, visited, &predictor2);
1841
1842	      /* It is difficult to combine value predictors.  Simply assume
1843		 that later predictor is weaker and take its prediction.  */
1844	      if (predictor && *predictor < predictor2)
1845		*predictor = predictor2;
1846	      if (!new_val)
1847		return NULL;
1848	      if (!val)
1849		val = new_val;
1850	      else if (!operand_equal_p (val, new_val, false))
1851		return NULL;
1852	    }
1853	  return val;
1854	}
1855      if (is_gimple_assign (def))
1856	{
1857	  if (gimple_assign_lhs (def) != op0)
1858	    return NULL;
1859
1860	  return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1861					gimple_assign_rhs1 (def),
1862					gimple_assign_rhs_code (def),
1863					gimple_assign_rhs2 (def),
1864					visited, predictor);
1865	}
1866
1867      if (is_gimple_call (def))
1868	{
1869	  tree decl = gimple_call_fndecl (def);
1870	  if (!decl)
1871	    {
1872	      if (gimple_call_internal_p (def)
1873		  && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1874		{
1875		  gcc_assert (gimple_call_num_args (def) == 3);
1876		  tree val = gimple_call_arg (def, 0);
1877		  if (TREE_CONSTANT (val))
1878		    return val;
1879		  if (predictor)
1880		    {
1881		      *predictor = PRED_BUILTIN_EXPECT;
1882		      tree val2 = gimple_call_arg (def, 2);
1883		      gcc_assert (TREE_CODE (val2) == INTEGER_CST
1884				  && tree_fits_uhwi_p (val2)
1885				  && tree_to_uhwi (val2) < END_PREDICTORS);
1886		      *predictor = (enum br_predictor) tree_to_uhwi (val2);
1887		    }
1888		  return gimple_call_arg (def, 1);
1889		}
1890	      return NULL;
1891	    }
1892	  if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1893	    switch (DECL_FUNCTION_CODE (decl))
1894	      {
1895	      case BUILT_IN_EXPECT:
1896		{
1897		  tree val;
1898		  if (gimple_call_num_args (def) != 2)
1899		    return NULL;
1900		  val = gimple_call_arg (def, 0);
1901		  if (TREE_CONSTANT (val))
1902		    return val;
1903		  if (predictor)
1904		    *predictor = PRED_BUILTIN_EXPECT;
1905		  return gimple_call_arg (def, 1);
1906		}
1907
1908	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1909	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1910	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1911	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1912	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1913	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1914	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1915	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1916	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1917	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1918	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1919	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1920	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1921		/* Assume that any given atomic operation has low contention,
1922		   and thus the compare-and-swap operation succeeds.  */
1923		if (predictor)
1924		  *predictor = PRED_COMPARE_AND_SWAP;
1925		return boolean_true_node;
1926	    }
1927	}
1928
1929      return NULL;
1930    }
1931
1932  if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1933    {
1934      tree res;
1935      enum br_predictor predictor2;
1936      op0 = expr_expected_value (op0, visited, predictor);
1937      if (!op0)
1938	return NULL;
1939      op1 = expr_expected_value (op1, visited, &predictor2);
1940      if (predictor && *predictor < predictor2)
1941	*predictor = predictor2;
1942      if (!op1)
1943	return NULL;
1944      res = fold_build2 (code, type, op0, op1);
1945      if (TREE_CONSTANT (res))
1946	return res;
1947      return NULL;
1948    }
1949  if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1950    {
1951      tree res;
1952      op0 = expr_expected_value (op0, visited, predictor);
1953      if (!op0)
1954	return NULL;
1955      res = fold_build1 (code, type, op0);
1956      if (TREE_CONSTANT (res))
1957	return res;
1958      return NULL;
1959    }
1960  return NULL;
1961}
1962
1963/* Return constant EXPR will likely have at execution time, NULL if unknown.
1964   The function is used by builtin_expect branch predictor so the evidence
1965   must come from this construct and additional possible constant folding.
1966
1967   We may want to implement more involved value guess (such as value range
1968   propagation based prediction), but such tricks shall go to new
1969   implementation.  */
1970
1971static tree
1972expr_expected_value (tree expr, bitmap visited,
1973		     enum br_predictor *predictor)
1974{
1975  enum tree_code code;
1976  tree op0, op1;
1977
1978  if (TREE_CONSTANT (expr))
1979    {
1980      if (predictor)
1981	*predictor = PRED_UNCONDITIONAL;
1982      return expr;
1983    }
1984
1985  extract_ops_from_tree (expr, &code, &op0, &op1);
1986  return expr_expected_value_1 (TREE_TYPE (expr),
1987				op0, code, op1, visited, predictor);
1988}
1989
1990/* Predict using opcode of the last statement in basic block.  */
1991static void
1992tree_predict_by_opcode (basic_block bb)
1993{
1994  gimple stmt = last_stmt (bb);
1995  edge then_edge;
1996  tree op0, op1;
1997  tree type;
1998  tree val;
1999  enum tree_code cmp;
2000  bitmap visited;
2001  edge_iterator ei;
2002  enum br_predictor predictor;
2003
2004  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2005    return;
2006  FOR_EACH_EDGE (then_edge, ei, bb->succs)
2007    if (then_edge->flags & EDGE_TRUE_VALUE)
2008      break;
2009  op0 = gimple_cond_lhs (stmt);
2010  op1 = gimple_cond_rhs (stmt);
2011  cmp = gimple_cond_code (stmt);
2012  type = TREE_TYPE (op0);
2013  visited = BITMAP_ALLOC (NULL);
2014  val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
2015			       &predictor);
2016  BITMAP_FREE (visited);
2017  if (val && TREE_CODE (val) == INTEGER_CST)
2018    {
2019      if (predictor == PRED_BUILTIN_EXPECT)
2020	{
2021	  int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2022
2023	  gcc_assert (percent >= 0 && percent <= 100);
2024	  if (integer_zerop (val))
2025	    percent = 100 - percent;
2026	  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2027	}
2028      else
2029	predict_edge (then_edge, predictor,
2030		      integer_zerop (val) ? NOT_TAKEN : TAKEN);
2031    }
2032  /* Try "pointer heuristic."
2033     A comparison ptr == 0 is predicted as false.
2034     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2035  if (POINTER_TYPE_P (type))
2036    {
2037      if (cmp == EQ_EXPR)
2038	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2039      else if (cmp == NE_EXPR)
2040	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2041    }
2042  else
2043
2044  /* Try "opcode heuristic."
2045     EQ tests are usually false and NE tests are usually true. Also,
2046     most quantities are positive, so we can make the appropriate guesses
2047     about signed comparisons against zero.  */
2048    switch (cmp)
2049      {
2050      case EQ_EXPR:
2051      case UNEQ_EXPR:
2052	/* Floating point comparisons appears to behave in a very
2053	   unpredictable way because of special role of = tests in
2054	   FP code.  */
2055	if (FLOAT_TYPE_P (type))
2056	  ;
2057	/* Comparisons with 0 are often used for booleans and there is
2058	   nothing useful to predict about them.  */
2059	else if (integer_zerop (op0) || integer_zerop (op1))
2060	  ;
2061	else
2062	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2063	break;
2064
2065      case NE_EXPR:
2066      case LTGT_EXPR:
2067	/* Floating point comparisons appears to behave in a very
2068	   unpredictable way because of special role of = tests in
2069	   FP code.  */
2070	if (FLOAT_TYPE_P (type))
2071	  ;
2072	/* Comparisons with 0 are often used for booleans and there is
2073	   nothing useful to predict about them.  */
2074	else if (integer_zerop (op0)
2075		 || integer_zerop (op1))
2076	  ;
2077	else
2078	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2079	break;
2080
2081      case ORDERED_EXPR:
2082	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2083	break;
2084
2085      case UNORDERED_EXPR:
2086	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2087	break;
2088
2089      case LE_EXPR:
2090      case LT_EXPR:
2091	if (integer_zerop (op1)
2092	    || integer_onep (op1)
2093	    || integer_all_onesp (op1)
2094	    || real_zerop (op1)
2095	    || real_onep (op1)
2096	    || real_minus_onep (op1))
2097	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2098	break;
2099
2100      case GE_EXPR:
2101      case GT_EXPR:
2102	if (integer_zerop (op1)
2103	    || integer_onep (op1)
2104	    || integer_all_onesp (op1)
2105	    || real_zerop (op1)
2106	    || real_onep (op1)
2107	    || real_minus_onep (op1))
2108	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2109	break;
2110
2111      default:
2112	break;
2113      }
2114}
2115
2116/* Try to guess whether the value of return means error code.  */
2117
2118static enum br_predictor
2119return_prediction (tree val, enum prediction *prediction)
2120{
2121  /* VOID.  */
2122  if (!val)
2123    return PRED_NO_PREDICTION;
2124  /* Different heuristics for pointers and scalars.  */
2125  if (POINTER_TYPE_P (TREE_TYPE (val)))
2126    {
2127      /* NULL is usually not returned.  */
2128      if (integer_zerop (val))
2129	{
2130	  *prediction = NOT_TAKEN;
2131	  return PRED_NULL_RETURN;
2132	}
2133    }
2134  else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2135    {
2136      /* Negative return values are often used to indicate
2137         errors.  */
2138      if (TREE_CODE (val) == INTEGER_CST
2139	  && tree_int_cst_sgn (val) < 0)
2140	{
2141	  *prediction = NOT_TAKEN;
2142	  return PRED_NEGATIVE_RETURN;
2143	}
2144      /* Constant return values seems to be commonly taken.
2145         Zero/one often represent booleans so exclude them from the
2146	 heuristics.  */
2147      if (TREE_CONSTANT (val)
2148	  && (!integer_zerop (val) && !integer_onep (val)))
2149	{
2150	  *prediction = TAKEN;
2151	  return PRED_CONST_RETURN;
2152	}
2153    }
2154  return PRED_NO_PREDICTION;
2155}
2156
2157/* Find the basic block with return expression and look up for possible
2158   return value trying to apply RETURN_PREDICTION heuristics.  */
2159static void
2160apply_return_prediction (void)
2161{
2162  gimple return_stmt = NULL;
2163  tree return_val;
2164  edge e;
2165  gimple phi;
2166  int phi_num_args, i;
2167  enum br_predictor pred;
2168  enum prediction direction;
2169  edge_iterator ei;
2170
2171  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2172    {
2173      return_stmt = last_stmt (e->src);
2174      if (return_stmt
2175	  && gimple_code (return_stmt) == GIMPLE_RETURN)
2176	break;
2177    }
2178  if (!e)
2179    return;
2180  return_val = gimple_return_retval (return_stmt);
2181  if (!return_val)
2182    return;
2183  if (TREE_CODE (return_val) != SSA_NAME
2184      || !SSA_NAME_DEF_STMT (return_val)
2185      || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2186    return;
2187  phi = SSA_NAME_DEF_STMT (return_val);
2188  phi_num_args = gimple_phi_num_args (phi);
2189  pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2190
2191  /* Avoid the degenerate case where all return values form the function
2192     belongs to same category (ie they are all positive constants)
2193     so we can hardly say something about them.  */
2194  for (i = 1; i < phi_num_args; i++)
2195    if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2196      break;
2197  if (i != phi_num_args)
2198    for (i = 0; i < phi_num_args; i++)
2199      {
2200	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2201	if (pred != PRED_NO_PREDICTION)
2202	  predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2203				         direction);
2204      }
2205}
2206
2207/* Look for basic block that contains unlikely to happen events
2208   (such as noreturn calls) and mark all paths leading to execution
2209   of this basic blocks as unlikely.  */
2210
2211static void
2212tree_bb_level_predictions (void)
2213{
2214  basic_block bb;
2215  bool has_return_edges = false;
2216  edge e;
2217  edge_iterator ei;
2218
2219  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2220    if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2221      {
2222        has_return_edges = true;
2223	break;
2224      }
2225
2226  apply_return_prediction ();
2227
2228  FOR_EACH_BB_FN (bb, cfun)
2229    {
2230      gimple_stmt_iterator gsi;
2231
2232      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2233	{
2234	  gimple stmt = gsi_stmt (gsi);
2235	  tree decl;
2236
2237	  if (is_gimple_call (stmt))
2238	    {
2239	      if ((gimple_call_flags (stmt) & ECF_NORETURN)
2240	          && has_return_edges)
2241		predict_paths_leading_to (bb, PRED_NORETURN,
2242					  NOT_TAKEN);
2243	      decl = gimple_call_fndecl (stmt);
2244	      if (decl
2245		  && lookup_attribute ("cold",
2246				       DECL_ATTRIBUTES (decl)))
2247		predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2248					  NOT_TAKEN);
2249	    }
2250	  else if (gimple_code (stmt) == GIMPLE_PREDICT)
2251	    {
2252	      predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2253					gimple_predict_outcome (stmt));
2254	      /* Keep GIMPLE_PREDICT around so early inlining will propagate
2255	         hints to callers.  */
2256	    }
2257	}
2258    }
2259}
2260
2261#ifdef ENABLE_CHECKING
2262
2263/* Callback for pointer_map_traverse, asserts that the pointer map is
2264   empty.  */
2265
2266static bool
2267assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2268		 void *data ATTRIBUTE_UNUSED)
2269{
2270  gcc_assert (!*value);
2271  return false;
2272}
2273#endif
2274
2275/* Predict branch probabilities and estimate profile for basic block BB.  */
2276
2277static void
2278tree_estimate_probability_bb (basic_block bb)
2279{
2280  edge e;
2281  edge_iterator ei;
2282  gimple last;
2283
2284  FOR_EACH_EDGE (e, ei, bb->succs)
2285    {
2286      /* Predict edges to user labels with attributes.  */
2287      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2288	{
2289	  gimple_stmt_iterator gi;
2290	  for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2291	    {
2292	      gimple stmt = gsi_stmt (gi);
2293	      tree decl;
2294
2295	      if (gimple_code (stmt) != GIMPLE_LABEL)
2296		break;
2297	      decl = gimple_label_label (stmt);
2298	      if (DECL_ARTIFICIAL (decl))
2299		continue;
2300
2301	      /* Finally, we have a user-defined label.  */
2302	      if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2303		predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2304	      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2305		predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2306	    }
2307	}
2308
2309      /* Predict early returns to be probable, as we've already taken
2310	 care for error returns and other cases are often used for
2311	 fast paths through function.
2312
2313	 Since we've already removed the return statements, we are
2314	 looking for CFG like:
2315
2316	 if (conditional)
2317	 {
2318	 ..
2319	 goto return_block
2320	 }
2321	 some other blocks
2322	 return_block:
2323	 return_stmt.  */
2324      if (e->dest != bb->next_bb
2325	  && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2326	  && single_succ_p (e->dest)
2327	  && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2328	  && (last = last_stmt (e->dest)) != NULL
2329	  && gimple_code (last) == GIMPLE_RETURN)
2330	{
2331	  edge e1;
2332	  edge_iterator ei1;
2333
2334	  if (single_succ_p (bb))
2335	    {
2336	      FOR_EACH_EDGE (e1, ei1, bb->preds)
2337		if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2338		    && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2339		    && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2340		  predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2341	    }
2342	  else
2343	    if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2344		&& !predicted_by_p (e->src, PRED_CONST_RETURN)
2345		&& !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2346	      predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2347	}
2348
2349      /* Look for block we are guarding (ie we dominate it,
2350	 but it doesn't postdominate us).  */
2351      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2352	  && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2353	  && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2354	{
2355	  gimple_stmt_iterator bi;
2356
2357	  /* The call heuristic claims that a guarded function call
2358	     is improbable.  This is because such calls are often used
2359	     to signal exceptional situations such as printing error
2360	     messages.  */
2361	  for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2362	       gsi_next (&bi))
2363	    {
2364	      gimple stmt = gsi_stmt (bi);
2365	      if (is_gimple_call (stmt)
2366		  /* Constant and pure calls are hardly used to signalize
2367		     something exceptional.  */
2368		  && gimple_has_side_effects (stmt))
2369		{
2370		  predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2371		  break;
2372		}
2373	    }
2374	}
2375    }
2376  tree_predict_by_opcode (bb);
2377}
2378
2379/* Predict branch probabilities and estimate profile of the tree CFG.
2380   This function can be called from the loop optimizers to recompute
2381   the profile information.  */
2382
2383void
2384tree_estimate_probability (void)
2385{
2386  basic_block bb;
2387
2388  add_noreturn_fake_exit_edges ();
2389  connect_infinite_loops_to_exit ();
2390  /* We use loop_niter_by_eval, which requires that the loops have
2391     preheaders.  */
2392  create_preheaders (CP_SIMPLE_PREHEADERS);
2393  calculate_dominance_info (CDI_POST_DOMINATORS);
2394
2395  bb_predictions = pointer_map_create ();
2396  tree_bb_level_predictions ();
2397  record_loop_exits ();
2398
2399  if (number_of_loops (cfun) > 1)
2400    predict_loops ();
2401
2402  FOR_EACH_BB_FN (bb, cfun)
2403    tree_estimate_probability_bb (bb);
2404
2405  FOR_EACH_BB_FN (bb, cfun)
2406    combine_predictions_for_bb (bb);
2407
2408#ifdef ENABLE_CHECKING
2409  pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2410#endif
2411  pointer_map_destroy (bb_predictions);
2412  bb_predictions = NULL;
2413
2414  estimate_bb_frequencies (false);
2415  free_dominance_info (CDI_POST_DOMINATORS);
2416  remove_fake_exit_edges ();
2417}
2418
2419/* Predict edges to successors of CUR whose sources are not postdominated by
2420   BB by PRED and recurse to all postdominators.  */
2421
2422static void
2423predict_paths_for_bb (basic_block cur, basic_block bb,
2424		      enum br_predictor pred,
2425		      enum prediction taken,
2426		      bitmap visited)
2427{
2428  edge e;
2429  edge_iterator ei;
2430  basic_block son;
2431
2432  /* We are looking for all edges forming edge cut induced by
2433     set of all blocks postdominated by BB.  */
2434  FOR_EACH_EDGE (e, ei, cur->preds)
2435    if (e->src->index >= NUM_FIXED_BLOCKS
2436	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2437    {
2438      edge e2;
2439      edge_iterator ei2;
2440      bool found = false;
2441
2442      /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2443      if (e->flags & (EDGE_EH | EDGE_FAKE))
2444	continue;
2445      gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2446
2447      /* See if there is an edge from e->src that is not abnormal
2448	 and does not lead to BB.  */
2449      FOR_EACH_EDGE (e2, ei2, e->src->succs)
2450	if (e2 != e
2451	    && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2452	    && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2453	  {
2454	    found = true;
2455	    break;
2456	  }
2457
2458      /* If there is non-abnormal path leaving e->src, predict edge
2459	 using predictor.  Otherwise we need to look for paths
2460	 leading to e->src.
2461
2462	 The second may lead to infinite loop in the case we are predicitng
2463	 regions that are only reachable by abnormal edges.  We simply
2464	 prevent visiting given BB twice.  */
2465      if (found)
2466        predict_edge_def (e, pred, taken);
2467      else if (bitmap_set_bit (visited, e->src->index))
2468	predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2469    }
2470  for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2471       son;
2472       son = next_dom_son (CDI_POST_DOMINATORS, son))
2473    predict_paths_for_bb (son, bb, pred, taken, visited);
2474}
2475
2476/* Sets branch probabilities according to PREDiction and
2477   FLAGS.  */
2478
2479static void
2480predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2481			  enum prediction taken)
2482{
2483  bitmap visited = BITMAP_ALLOC (NULL);
2484  predict_paths_for_bb (bb, bb, pred, taken, visited);
2485  BITMAP_FREE (visited);
2486}
2487
2488/* Like predict_paths_leading_to but take edge instead of basic block.  */
2489
2490static void
2491predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2492			       enum prediction taken)
2493{
2494  bool has_nonloop_edge = false;
2495  edge_iterator ei;
2496  edge e2;
2497
2498  basic_block bb = e->src;
2499  FOR_EACH_EDGE (e2, ei, bb->succs)
2500    if (e2->dest != e->src && e2->dest != e->dest
2501	&& !(e->flags & (EDGE_EH | EDGE_FAKE))
2502	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2503      {
2504	has_nonloop_edge = true;
2505	break;
2506      }
2507  if (!has_nonloop_edge)
2508    {
2509      bitmap visited = BITMAP_ALLOC (NULL);
2510      predict_paths_for_bb (bb, bb, pred, taken, visited);
2511      BITMAP_FREE (visited);
2512    }
2513  else
2514    predict_edge_def (e, pred, taken);
2515}
2516
2517/* This is used to carry information about basic blocks.  It is
2518   attached to the AUX field of the standard CFG block.  */
2519
2520typedef struct block_info_def
2521{
2522  /* Estimated frequency of execution of basic_block.  */
2523  sreal frequency;
2524
2525  /* To keep queue of basic blocks to process.  */
2526  basic_block next;
2527
2528  /* Number of predecessors we need to visit first.  */
2529  int npredecessors;
2530} *block_info;
2531
2532/* Similar information for edges.  */
2533typedef struct edge_info_def
2534{
2535  /* In case edge is a loopback edge, the probability edge will be reached
2536     in case header is.  Estimated number of iterations of the loop can be
2537     then computed as 1 / (1 - back_edge_prob).  */
2538  sreal back_edge_prob;
2539  /* True if the edge is a loopback edge in the natural loop.  */
2540  unsigned int back_edge:1;
2541} *edge_info;
2542
2543#define BLOCK_INFO(B)	((block_info) (B)->aux)
2544#define EDGE_INFO(E)	((edge_info) (E)->aux)
2545
2546/* Helper function for estimate_bb_frequencies.
2547   Propagate the frequencies in blocks marked in
2548   TOVISIT, starting in HEAD.  */
2549
2550static void
2551propagate_freq (basic_block head, bitmap tovisit)
2552{
2553  basic_block bb;
2554  basic_block last;
2555  unsigned i;
2556  edge e;
2557  basic_block nextbb;
2558  bitmap_iterator bi;
2559
2560  /* For each basic block we need to visit count number of his predecessors
2561     we need to visit first.  */
2562  EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2563    {
2564      edge_iterator ei;
2565      int count = 0;
2566
2567      bb = BASIC_BLOCK_FOR_FN (cfun, i);
2568
2569      FOR_EACH_EDGE (e, ei, bb->preds)
2570	{
2571	  bool visit = bitmap_bit_p (tovisit, e->src->index);
2572
2573	  if (visit && !(e->flags & EDGE_DFS_BACK))
2574	    count++;
2575	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2576	    fprintf (dump_file,
2577		     "Irreducible region hit, ignoring edge to %i->%i\n",
2578		     e->src->index, bb->index);
2579	}
2580      BLOCK_INFO (bb)->npredecessors = count;
2581      /* When function never returns, we will never process exit block.  */
2582      if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2583	bb->count = bb->frequency = 0;
2584    }
2585
2586  memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2587  last = head;
2588  for (bb = head; bb; bb = nextbb)
2589    {
2590      edge_iterator ei;
2591      sreal cyclic_probability, frequency;
2592
2593      memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2594      memcpy (&frequency, &real_zero, sizeof (real_zero));
2595
2596      nextbb = BLOCK_INFO (bb)->next;
2597      BLOCK_INFO (bb)->next = NULL;
2598
2599      /* Compute frequency of basic block.  */
2600      if (bb != head)
2601	{
2602#ifdef ENABLE_CHECKING
2603	  FOR_EACH_EDGE (e, ei, bb->preds)
2604	    gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2605			|| (e->flags & EDGE_DFS_BACK));
2606#endif
2607
2608	  FOR_EACH_EDGE (e, ei, bb->preds)
2609	    if (EDGE_INFO (e)->back_edge)
2610	      {
2611		sreal_add (&cyclic_probability, &cyclic_probability,
2612			   &EDGE_INFO (e)->back_edge_prob);
2613	      }
2614	    else if (!(e->flags & EDGE_DFS_BACK))
2615	      {
2616		sreal tmp;
2617
2618		/*  frequency += (e->probability
2619				  * BLOCK_INFO (e->src)->frequency /
2620				  REG_BR_PROB_BASE);  */
2621
2622		sreal_init (&tmp, e->probability, 0);
2623		sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2624		sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2625		sreal_add (&frequency, &frequency, &tmp);
2626	      }
2627
2628	  if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2629	    {
2630	      memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2631		      sizeof (frequency));
2632	    }
2633	  else
2634	    {
2635	      if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2636		{
2637		  memcpy (&cyclic_probability, &real_almost_one,
2638			  sizeof (real_almost_one));
2639		}
2640
2641	      /* BLOCK_INFO (bb)->frequency = frequency
2642					      / (1 - cyclic_probability) */
2643
2644	      sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2645	      sreal_div (&BLOCK_INFO (bb)->frequency,
2646			 &frequency, &cyclic_probability);
2647	    }
2648	}
2649
2650      bitmap_clear_bit (tovisit, bb->index);
2651
2652      e = find_edge (bb, head);
2653      if (e)
2654	{
2655	  sreal tmp;
2656
2657	  /* EDGE_INFO (e)->back_edge_prob
2658	     = ((e->probability * BLOCK_INFO (bb)->frequency)
2659	     / REG_BR_PROB_BASE); */
2660
2661	  sreal_init (&tmp, e->probability, 0);
2662	  sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2663	  sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2664		     &tmp, &real_inv_br_prob_base);
2665	}
2666
2667      /* Propagate to successor blocks.  */
2668      FOR_EACH_EDGE (e, ei, bb->succs)
2669	if (!(e->flags & EDGE_DFS_BACK)
2670	    && BLOCK_INFO (e->dest)->npredecessors)
2671	  {
2672	    BLOCK_INFO (e->dest)->npredecessors--;
2673	    if (!BLOCK_INFO (e->dest)->npredecessors)
2674	      {
2675		if (!nextbb)
2676		  nextbb = e->dest;
2677		else
2678		  BLOCK_INFO (last)->next = e->dest;
2679
2680		last = e->dest;
2681	      }
2682	  }
2683    }
2684}
2685
2686/* Estimate frequencies in loops at same nest level.  */
2687
2688static void
2689estimate_loops_at_level (struct loop *first_loop)
2690{
2691  struct loop *loop;
2692
2693  for (loop = first_loop; loop; loop = loop->next)
2694    {
2695      edge e;
2696      basic_block *bbs;
2697      unsigned i;
2698      bitmap tovisit = BITMAP_ALLOC (NULL);
2699
2700      estimate_loops_at_level (loop->inner);
2701
2702      /* Find current loop back edge and mark it.  */
2703      e = loop_latch_edge (loop);
2704      EDGE_INFO (e)->back_edge = 1;
2705
2706      bbs = get_loop_body (loop);
2707      for (i = 0; i < loop->num_nodes; i++)
2708	bitmap_set_bit (tovisit, bbs[i]->index);
2709      free (bbs);
2710      propagate_freq (loop->header, tovisit);
2711      BITMAP_FREE (tovisit);
2712    }
2713}
2714
2715/* Propagates frequencies through structure of loops.  */
2716
2717static void
2718estimate_loops (void)
2719{
2720  bitmap tovisit = BITMAP_ALLOC (NULL);
2721  basic_block bb;
2722
2723  /* Start by estimating the frequencies in the loops.  */
2724  if (number_of_loops (cfun) > 1)
2725    estimate_loops_at_level (current_loops->tree_root->inner);
2726
2727  /* Now propagate the frequencies through all the blocks.  */
2728  FOR_ALL_BB_FN (bb, cfun)
2729    {
2730      bitmap_set_bit (tovisit, bb->index);
2731    }
2732  propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2733  BITMAP_FREE (tovisit);
2734}
2735
2736/* Drop the profile for NODE to guessed, and update its frequency based on
2737   whether it is expected to be hot given the CALL_COUNT.  */
2738
2739static void
2740drop_profile (struct cgraph_node *node, gcov_type call_count)
2741{
2742  struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2743  /* In the case where this was called by another function with a
2744     dropped profile, call_count will be 0. Since there are no
2745     non-zero call counts to this function, we don't know for sure
2746     whether it is hot, and therefore it will be marked normal below.  */
2747  bool hot = maybe_hot_count_p (NULL, call_count);
2748
2749  if (dump_file)
2750    fprintf (dump_file,
2751             "Dropping 0 profile for %s/%i. %s based on calls.\n",
2752             node->name (), node->order,
2753             hot ? "Function is hot" : "Function is normal");
2754  /* We only expect to miss profiles for functions that are reached
2755     via non-zero call edges in cases where the function may have
2756     been linked from another module or library (COMDATs and extern
2757     templates). See the comments below for handle_missing_profiles.
2758     Also, only warn in cases where the missing counts exceed the
2759     number of training runs. In certain cases with an execv followed
2760     by a no-return call the profile for the no-return call is not
2761     dumped and there can be a mismatch.  */
2762  if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2763      && call_count > profile_info->runs)
2764    {
2765      if (flag_profile_correction)
2766        {
2767          if (dump_file)
2768            fprintf (dump_file,
2769                     "Missing counts for called function %s/%i\n",
2770                     node->name (), node->order);
2771        }
2772      else
2773        warning (0, "Missing counts for called function %s/%i",
2774                 node->name (), node->order);
2775    }
2776
2777  profile_status_for_fn (fn)
2778      = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2779  node->frequency
2780      = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2781}
2782
2783/* In the case of COMDAT routines, multiple object files will contain the same
2784   function and the linker will select one for the binary. In that case
2785   all the other copies from the profile instrument binary will be missing
2786   profile counts. Look for cases where this happened, due to non-zero
2787   call counts going to 0-count functions, and drop the profile to guessed
2788   so that we can use the estimated probabilities and avoid optimizing only
2789   for size.
2790
2791   The other case where the profile may be missing is when the routine
2792   is not going to be emitted to the object file, e.g. for "extern template"
2793   class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2794   all other cases of non-zero calls to 0-count functions.  */
2795
2796void
2797handle_missing_profiles (void)
2798{
2799  struct cgraph_node *node;
2800  int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2801  vec<struct cgraph_node *> worklist;
2802  worklist.create (64);
2803
2804  /* See if 0 count function has non-0 count callers.  In this case we
2805     lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2806  FOR_EACH_DEFINED_FUNCTION (node)
2807    {
2808      struct cgraph_edge *e;
2809      gcov_type call_count = 0;
2810      gcov_type max_tp_first_run = 0;
2811      struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2812
2813      if (node->count)
2814        continue;
2815      for (e = node->callers; e; e = e->next_caller)
2816      {
2817        call_count += e->count;
2818
2819	if (e->caller->tp_first_run > max_tp_first_run)
2820	  max_tp_first_run = e->caller->tp_first_run;
2821      }
2822
2823      /* If time profile is missing, let assign the maximum that comes from
2824	 caller functions.  */
2825      if (!node->tp_first_run && max_tp_first_run)
2826	node->tp_first_run = max_tp_first_run + 1;
2827
2828      if (call_count
2829          && fn && fn->cfg
2830          && (call_count * unlikely_count_fraction >= profile_info->runs))
2831        {
2832          drop_profile (node, call_count);
2833          worklist.safe_push (node);
2834        }
2835    }
2836
2837  /* Propagate the profile dropping to other 0-count COMDATs that are
2838     potentially called by COMDATs we already dropped the profile on.  */
2839  while (worklist.length () > 0)
2840    {
2841      struct cgraph_edge *e;
2842
2843      node = worklist.pop ();
2844      for (e = node->callees; e; e = e->next_caller)
2845        {
2846          struct cgraph_node *callee = e->callee;
2847          struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2848
2849          if (callee->count > 0)
2850            continue;
2851          if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2852              && profile_status_for_fn (fn) == PROFILE_READ)
2853            {
2854              drop_profile (node, 0);
2855              worklist.safe_push (callee);
2856            }
2857        }
2858    }
2859  worklist.release ();
2860}
2861
2862/* Convert counts measured by profile driven feedback to frequencies.
2863   Return nonzero iff there was any nonzero execution count.  */
2864
2865int
2866counts_to_freqs (void)
2867{
2868  gcov_type count_max, true_count_max = 0;
2869  basic_block bb;
2870
2871  /* Don't overwrite the estimated frequencies when the profile for
2872     the function is missing.  We may drop this function PROFILE_GUESSED
2873     later in drop_profile ().  */
2874  if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2875    return 0;
2876
2877  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2878    true_count_max = MAX (bb->count, true_count_max);
2879
2880  count_max = MAX (true_count_max, 1);
2881  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2882    bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2883
2884  return true_count_max;
2885}
2886
2887/* Return true if function is likely to be expensive, so there is no point to
2888   optimize performance of prologue, epilogue or do inlining at the expense
2889   of code size growth.  THRESHOLD is the limit of number of instructions
2890   function can execute at average to be still considered not expensive.  */
2891
2892bool
2893expensive_function_p (int threshold)
2894{
2895  unsigned int sum = 0;
2896  basic_block bb;
2897  unsigned int limit;
2898
2899  /* We can not compute accurately for large thresholds due to scaled
2900     frequencies.  */
2901  gcc_assert (threshold <= BB_FREQ_MAX);
2902
2903  /* Frequencies are out of range.  This either means that function contains
2904     internal loop executing more than BB_FREQ_MAX times or profile feedback
2905     is available and function has not been executed at all.  */
2906  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2907    return true;
2908
2909  /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2910  limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2911  FOR_EACH_BB_FN (bb, cfun)
2912    {
2913      rtx insn;
2914
2915      FOR_BB_INSNS (bb, insn)
2916	if (active_insn_p (insn))
2917	  {
2918	    sum += bb->frequency;
2919	    if (sum > limit)
2920	      return true;
2921	}
2922    }
2923
2924  return false;
2925}
2926
2927/* Estimate and propagate basic block frequencies using the given branch
2928   probabilities.  If FORCE is true, the frequencies are used to estimate
2929   the counts even when there are already non-zero profile counts.  */
2930
2931void
2932estimate_bb_frequencies (bool force)
2933{
2934  basic_block bb;
2935  sreal freq_max;
2936
2937  if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2938    {
2939      static int real_values_initialized = 0;
2940
2941      if (!real_values_initialized)
2942        {
2943	  real_values_initialized = 1;
2944	  sreal_init (&real_zero, 0, 0);
2945	  sreal_init (&real_one, 1, 0);
2946	  sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2947	  sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2948	  sreal_init (&real_one_half, 1, -1);
2949	  sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2950	  sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2951	}
2952
2953      mark_dfs_back_edges ();
2954
2955      single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2956	 REG_BR_PROB_BASE;
2957
2958      /* Set up block info for each basic block.  */
2959      alloc_aux_for_blocks (sizeof (struct block_info_def));
2960      alloc_aux_for_edges (sizeof (struct edge_info_def));
2961      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2962	{
2963	  edge e;
2964	  edge_iterator ei;
2965
2966	  FOR_EACH_EDGE (e, ei, bb->succs)
2967	    {
2968	      sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2969	      sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2970			 &EDGE_INFO (e)->back_edge_prob,
2971			 &real_inv_br_prob_base);
2972	    }
2973	}
2974
2975      /* First compute frequencies locally for each loop from innermost
2976         to outermost to examine frequencies for back edges.  */
2977      estimate_loops ();
2978
2979      memcpy (&freq_max, &real_zero, sizeof (real_zero));
2980      FOR_EACH_BB_FN (bb, cfun)
2981	if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2982	  memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2983
2984      sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2985      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2986	{
2987	  sreal tmp;
2988
2989	  sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2990	  sreal_add (&tmp, &tmp, &real_one_half);
2991	  bb->frequency = sreal_to_int (&tmp);
2992	}
2993
2994      free_aux_for_blocks ();
2995      free_aux_for_edges ();
2996    }
2997  compute_function_frequency ();
2998}
2999
3000/* Decide whether function is hot, cold or unlikely executed.  */
3001void
3002compute_function_frequency (void)
3003{
3004  basic_block bb;
3005  struct cgraph_node *node = cgraph_get_node (current_function_decl);
3006
3007  if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3008      || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3009    node->only_called_at_startup = true;
3010  if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3011    node->only_called_at_exit = true;
3012
3013  if (profile_status_for_fn (cfun) != PROFILE_READ)
3014    {
3015      int flags = flags_from_decl_or_type (current_function_decl);
3016      if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3017	  != NULL)
3018        node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3019      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3020	       != NULL)
3021        node->frequency = NODE_FREQUENCY_HOT;
3022      else if (flags & ECF_NORETURN)
3023        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3024      else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3025        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3026      else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3027	       || DECL_STATIC_DESTRUCTOR (current_function_decl))
3028        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3029      return;
3030    }
3031
3032  /* Only first time try to drop function into unlikely executed.
3033     After inlining the roundoff errors may confuse us.
3034     Ipa-profile pass will drop functions only called from unlikely
3035     functions to unlikely and that is most of what we care about.  */
3036  if (!cfun->after_inlining)
3037    node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3038  FOR_EACH_BB_FN (bb, cfun)
3039    {
3040      if (maybe_hot_bb_p (cfun, bb))
3041	{
3042	  node->frequency = NODE_FREQUENCY_HOT;
3043	  return;
3044	}
3045      if (!probably_never_executed_bb_p (cfun, bb))
3046	node->frequency = NODE_FREQUENCY_NORMAL;
3047    }
3048}
3049
3050/* Build PREDICT_EXPR.  */
3051tree
3052build_predict_expr (enum br_predictor predictor, enum prediction taken)
3053{
3054  tree t = build1 (PREDICT_EXPR, void_type_node,
3055		   build_int_cst (integer_type_node, predictor));
3056  SET_PREDICT_EXPR_OUTCOME (t, taken);
3057  return t;
3058}
3059
3060const char *
3061predictor_name (enum br_predictor predictor)
3062{
3063  return predictor_info[predictor].name;
3064}
3065
3066/* Predict branch probabilities and estimate profile of the tree CFG. */
3067
3068namespace {
3069
3070const pass_data pass_data_profile =
3071{
3072  GIMPLE_PASS, /* type */
3073  "profile_estimate", /* name */
3074  OPTGROUP_NONE, /* optinfo_flags */
3075  true, /* has_execute */
3076  TV_BRANCH_PROB, /* tv_id */
3077  PROP_cfg, /* properties_required */
3078  0, /* properties_provided */
3079  0, /* properties_destroyed */
3080  0, /* todo_flags_start */
3081  TODO_verify_ssa, /* todo_flags_finish */
3082};
3083
3084class pass_profile : public gimple_opt_pass
3085{
3086public:
3087  pass_profile (gcc::context *ctxt)
3088    : gimple_opt_pass (pass_data_profile, ctxt)
3089  {}
3090
3091  /* opt_pass methods: */
3092  virtual bool gate (function *) { return flag_guess_branch_prob; }
3093  virtual unsigned int execute (function *);
3094
3095}; // class pass_profile
3096
3097unsigned int
3098pass_profile::execute (function *fun)
3099{
3100  unsigned nb_loops;
3101
3102  loop_optimizer_init (LOOPS_NORMAL);
3103  if (dump_file && (dump_flags & TDF_DETAILS))
3104    flow_loops_dump (dump_file, NULL, 0);
3105
3106  mark_irreducible_loops ();
3107
3108  nb_loops = number_of_loops (fun);
3109  if (nb_loops > 1)
3110    scev_initialize ();
3111
3112  tree_estimate_probability ();
3113
3114  if (nb_loops > 1)
3115    scev_finalize ();
3116
3117  loop_optimizer_finalize ();
3118  if (dump_file && (dump_flags & TDF_DETAILS))
3119    gimple_dump_cfg (dump_file, dump_flags);
3120 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3121    profile_status_for_fn (fun) = PROFILE_GUESSED;
3122  return 0;
3123}
3124
3125} // anon namespace
3126
3127gimple_opt_pass *
3128make_pass_profile (gcc::context *ctxt)
3129{
3130  return new pass_profile (ctxt);
3131}
3132
3133namespace {
3134
3135const pass_data pass_data_strip_predict_hints =
3136{
3137  GIMPLE_PASS, /* type */
3138  "*strip_predict_hints", /* name */
3139  OPTGROUP_NONE, /* optinfo_flags */
3140  true, /* has_execute */
3141  TV_BRANCH_PROB, /* tv_id */
3142  PROP_cfg, /* properties_required */
3143  0, /* properties_provided */
3144  0, /* properties_destroyed */
3145  0, /* todo_flags_start */
3146  TODO_verify_ssa, /* todo_flags_finish */
3147};
3148
3149class pass_strip_predict_hints : public gimple_opt_pass
3150{
3151public:
3152  pass_strip_predict_hints (gcc::context *ctxt)
3153    : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3154  {}
3155
3156  /* opt_pass methods: */
3157  opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3158  virtual unsigned int execute (function *);
3159
3160}; // class pass_strip_predict_hints
3161
3162/* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3163   we no longer need.  */
3164unsigned int
3165pass_strip_predict_hints::execute (function *fun)
3166{
3167  basic_block bb;
3168  gimple ass_stmt;
3169  tree var;
3170
3171  FOR_EACH_BB_FN (bb, fun)
3172    {
3173      gimple_stmt_iterator bi;
3174      for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3175	{
3176	  gimple stmt = gsi_stmt (bi);
3177
3178	  if (gimple_code (stmt) == GIMPLE_PREDICT)
3179	    {
3180	      gsi_remove (&bi, true);
3181	      continue;
3182	    }
3183	  else if (is_gimple_call (stmt))
3184	    {
3185	      tree fndecl = gimple_call_fndecl (stmt);
3186
3187	      if ((fndecl
3188		   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3189		   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3190		   && gimple_call_num_args (stmt) == 2)
3191		  || (gimple_call_internal_p (stmt)
3192		      && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3193		{
3194		  var = gimple_call_lhs (stmt);
3195		  if (var)
3196		    {
3197		      ass_stmt
3198			= gimple_build_assign (var, gimple_call_arg (stmt, 0));
3199		      gsi_replace (&bi, ass_stmt, true);
3200		    }
3201		  else
3202		    {
3203		      gsi_remove (&bi, true);
3204		      continue;
3205		    }
3206		}
3207	    }
3208	  gsi_next (&bi);
3209	}
3210    }
3211  return 0;
3212}
3213
3214} // anon namespace
3215
3216gimple_opt_pass *
3217make_pass_strip_predict_hints (gcc::context *ctxt)
3218{
3219  return new pass_strip_predict_hints (ctxt);
3220}
3221
3222/* Rebuild function frequencies.  Passes are in general expected to
3223   maintain profile by hand, however in some cases this is not possible:
3224   for example when inlining several functions with loops freuqencies might run
3225   out of scale and thus needs to be recomputed.  */
3226
3227void
3228rebuild_frequencies (void)
3229{
3230  timevar_push (TV_REBUILD_FREQUENCIES);
3231
3232  /* When the max bb count in the function is small, there is a higher
3233     chance that there were truncation errors in the integer scaling
3234     of counts by inlining and other optimizations. This could lead
3235     to incorrect classification of code as being cold when it isn't.
3236     In that case, force the estimation of bb counts/frequencies from the
3237     branch probabilities, rather than computing frequencies from counts,
3238     which may also lead to frequencies incorrectly reduced to 0. There
3239     is less precision in the probabilities, so we only do this for small
3240     max counts.  */
3241  gcov_type count_max = 0;
3242  basic_block bb;
3243  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3244    count_max = MAX (bb->count, count_max);
3245
3246  if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3247      || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3248    {
3249      loop_optimizer_init (0);
3250      add_noreturn_fake_exit_edges ();
3251      mark_irreducible_loops ();
3252      connect_infinite_loops_to_exit ();
3253      estimate_bb_frequencies (true);
3254      remove_fake_exit_edges ();
3255      loop_optimizer_finalize ();
3256    }
3257  else if (profile_status_for_fn (cfun) == PROFILE_READ)
3258    counts_to_freqs ();
3259  else
3260    gcc_unreachable ();
3261  timevar_pop (TV_REBUILD_FREQUENCIES);
3262}
3263