1/* Branch prediction routines for the GNU compiler.
2   Copyright (C) 2000-2014 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* References:
21
22   [1] "Branch Prediction for Free"
23       Ball and Larus; PLDI '93.
24   [2] "Static Branch Frequency and Program Profile Analysis"
25       Wu and Larus; MICRO-27.
26   [3] "Corpus-based Static Branch Prediction"
27       Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
28
29
30#include "config.h"
31#include "system.h"
32#include "coretypes.h"
33#include "tm.h"
34#include "tree.h"
35#include "calls.h"
36#include "rtl.h"
37#include "tm_p.h"
38#include "hard-reg-set.h"
39#include "basic-block.h"
40#include "insn-config.h"
41#include "regs.h"
42#include "flags.h"
43#include "function.h"
44#include "except.h"
45#include "diagnostic-core.h"
46#include "recog.h"
47#include "expr.h"
48#include "predict.h"
49#include "coverage.h"
50#include "sreal.h"
51#include "params.h"
52#include "target.h"
53#include "cfgloop.h"
54#include "pointer-set.h"
55#include "tree-ssa-alias.h"
56#include "internal-fn.h"
57#include "gimple-expr.h"
58#include "is-a.h"
59#include "gimple.h"
60#include "gimple-iterator.h"
61#include "gimple-ssa.h"
62#include "cgraph.h"
63#include "tree-cfg.h"
64#include "tree-phinodes.h"
65#include "ssa-iterators.h"
66#include "tree-ssa-loop-niter.h"
67#include "tree-ssa-loop.h"
68#include "tree-pass.h"
69#include "tree-scalar-evolution.h"
70#include "cfgloop.h"
71
72/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
73		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
74static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
75	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
76
77static void combine_predictions_for_insn (rtx, basic_block);
78static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
79static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
80static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
81static bool can_predict_insn_p (const_rtx);
82
83/* Information we hold about each branch predictor.
84   Filled using information from predict.def.  */
85
86struct predictor_info
87{
88  const char *const name;	/* Name used in the debugging dumps.  */
89  const int hitrate;		/* Expected hitrate used by
90				   predict_insn_def call.  */
91  const int flags;
92};
93
94/* Use given predictor without Dempster-Shaffer theory if it matches
95   using first_match heuristics.  */
96#define PRED_FLAG_FIRST_MATCH 1
97
98/* Recompute hitrate in percent to our representation.  */
99
100#define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101
102#define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
103static const struct predictor_info predictor_info[]= {
104#include "predict.def"
105
106  /* Upper bound on predictors.  */
107  {NULL, 0, 0}
108};
109#undef DEF_PREDICTOR
110
111/* Return TRUE if frequency FREQ is considered to be hot.  */
112
113static inline bool
114maybe_hot_frequency_p (struct function *fun, int freq)
115{
116  struct cgraph_node *node = cgraph_get_node (fun->decl);
117  if (!profile_info || !flag_branch_probabilities)
118    {
119      if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
120        return false;
121      if (node->frequency == NODE_FREQUENCY_HOT)
122        return true;
123    }
124  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
125    return true;
126  if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
127      && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
128    return false;
129  if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
130    return false;
131  if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
132	      / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
133    return false;
134  return true;
135}
136
137static gcov_type min_count = -1;
138
139/* Determine the threshold for hot BB counts.  */
140
141gcov_type
142get_hot_bb_threshold ()
143{
144  gcov_working_set_t *ws;
145  if (min_count == -1)
146    {
147      ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
148      gcc_assert (ws);
149      min_count = ws->min_counter;
150    }
151  return min_count;
152}
153
154/* Set the threshold for hot BB counts.  */
155
156void
157set_hot_bb_threshold (gcov_type min)
158{
159  min_count = min;
160}
161
162/* Return TRUE if frequency FREQ is considered to be hot.  */
163
164static inline bool
165maybe_hot_count_p (struct function *fun, gcov_type count)
166{
167  if (fun && profile_status_for_fn (fun) != PROFILE_READ)
168    return true;
169  /* Code executed at most once is not hot.  */
170  if (profile_info->runs >= count)
171    return false;
172  return (count >= get_hot_bb_threshold ());
173}
174
175/* Return true in case BB can be CPU intensive and should be optimized
176   for maximal performance.  */
177
178bool
179maybe_hot_bb_p (struct function *fun, const_basic_block bb)
180{
181  gcc_checking_assert (fun);
182  if (profile_status_for_fn (fun) == PROFILE_READ)
183    return maybe_hot_count_p (fun, bb->count);
184  return maybe_hot_frequency_p (fun, bb->frequency);
185}
186
187/* Return true if the call can be hot.  */
188
189bool
190cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
191{
192  if (profile_info && flag_branch_probabilities
193      && !maybe_hot_count_p (NULL,
194                             edge->count))
195    return false;
196  if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
197      || (edge->callee
198	  && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
199    return false;
200  if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
201      && (edge->callee
202	  && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
203    return false;
204  if (optimize_size)
205    return false;
206  if (edge->caller->frequency == NODE_FREQUENCY_HOT)
207    return true;
208  if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
209      && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
210    return false;
211  if (flag_guess_branch_prob)
212    {
213      if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
214	  || edge->frequency <= (CGRAPH_FREQ_BASE
215				 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
216        return false;
217    }
218  return true;
219}
220
221/* Return true in case BB can be CPU intensive and should be optimized
222   for maximal performance.  */
223
224bool
225maybe_hot_edge_p (edge e)
226{
227  if (profile_status_for_fn (cfun) == PROFILE_READ)
228    return maybe_hot_count_p (cfun, e->count);
229  return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
230}
231
232
233
234/* Return true if profile COUNT and FREQUENCY, or function FUN static
235   node frequency reflects never being executed.  */
236
237static bool
238probably_never_executed (struct function *fun,
239                         gcov_type count, int frequency)
240{
241  gcc_checking_assert (fun);
242  if (profile_status_for_fn (cfun) == PROFILE_READ)
243    {
244      int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
245      if (count * unlikely_count_fraction >= profile_info->runs)
246	return false;
247      if (!frequency)
248	return true;
249      if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
250	return false;
251      if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
252	{
253          gcov_type computed_count;
254          /* Check for possibility of overflow, in which case entry bb count
255             is large enough to do the division first without losing much
256             precision.  */
257	  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
258	      REG_BR_PROB_BASE)
259            {
260              gcov_type scaled_count
261		  = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
262	     unlikely_count_fraction;
263	      computed_count = RDIV (scaled_count,
264				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
265            }
266          else
267            {
268	      computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
269				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
270              computed_count *= frequency * unlikely_count_fraction;
271            }
272          if (computed_count >= profile_info->runs)
273            return false;
274	}
275      return true;
276    }
277  if ((!profile_info || !flag_branch_probabilities)
278      && (cgraph_get_node (fun->decl)->frequency
279	  == NODE_FREQUENCY_UNLIKELY_EXECUTED))
280    return true;
281  return false;
282}
283
284
285/* Return true in case BB is probably never executed.  */
286
287bool
288probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
289{
290  return probably_never_executed (fun, bb->count, bb->frequency);
291}
292
293
294/* Return true in case edge E is probably never executed.  */
295
296bool
297probably_never_executed_edge_p (struct function *fun, edge e)
298{
299  return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
300}
301
302/* Return true if NODE should be optimized for size.  */
303
304bool
305cgraph_optimize_for_size_p (struct cgraph_node *node)
306{
307  if (optimize_size)
308    return true;
309  if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
310    return true;
311  else
312    return false;
313}
314
315/* Return true when current function should always be optimized for size.  */
316
317bool
318optimize_function_for_size_p (struct function *fun)
319{
320  if (optimize_size)
321    return true;
322  if (!fun || !fun->decl)
323    return false;
324  return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
325}
326
327/* Return true when current function should always be optimized for speed.  */
328
329bool
330optimize_function_for_speed_p (struct function *fun)
331{
332  return !optimize_function_for_size_p (fun);
333}
334
335/* Return TRUE when BB should be optimized for size.  */
336
337bool
338optimize_bb_for_size_p (const_basic_block bb)
339{
340  return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
341}
342
343/* Return TRUE when BB should be optimized for speed.  */
344
345bool
346optimize_bb_for_speed_p (const_basic_block bb)
347{
348  return !optimize_bb_for_size_p (bb);
349}
350
351/* Return TRUE when BB should be optimized for size.  */
352
353bool
354optimize_edge_for_size_p (edge e)
355{
356  return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
357}
358
359/* Return TRUE when BB should be optimized for speed.  */
360
361bool
362optimize_edge_for_speed_p (edge e)
363{
364  return !optimize_edge_for_size_p (e);
365}
366
367/* Return TRUE when BB should be optimized for size.  */
368
369bool
370optimize_insn_for_size_p (void)
371{
372  return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
373}
374
375/* Return TRUE when BB should be optimized for speed.  */
376
377bool
378optimize_insn_for_speed_p (void)
379{
380  return !optimize_insn_for_size_p ();
381}
382
383/* Return TRUE when LOOP should be optimized for size.  */
384
385bool
386optimize_loop_for_size_p (struct loop *loop)
387{
388  return optimize_bb_for_size_p (loop->header);
389}
390
391/* Return TRUE when LOOP should be optimized for speed.  */
392
393bool
394optimize_loop_for_speed_p (struct loop *loop)
395{
396  return optimize_bb_for_speed_p (loop->header);
397}
398
399/* Return TRUE when LOOP nest should be optimized for speed.  */
400
401bool
402optimize_loop_nest_for_speed_p (struct loop *loop)
403{
404  struct loop *l = loop;
405  if (optimize_loop_for_speed_p (loop))
406    return true;
407  l = loop->inner;
408  while (l && l != loop)
409    {
410      if (optimize_loop_for_speed_p (l))
411        return true;
412      if (l->inner)
413        l = l->inner;
414      else if (l->next)
415        l = l->next;
416      else
417        {
418	  while (l != loop && !l->next)
419	    l = loop_outer (l);
420	  if (l != loop)
421	    l = l->next;
422	}
423    }
424  return false;
425}
426
427/* Return TRUE when LOOP nest should be optimized for size.  */
428
429bool
430optimize_loop_nest_for_size_p (struct loop *loop)
431{
432  return !optimize_loop_nest_for_speed_p (loop);
433}
434
435/* Return true when edge E is likely to be well predictable by branch
436   predictor.  */
437
438bool
439predictable_edge_p (edge e)
440{
441  if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
442    return false;
443  if ((e->probability
444       <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
445      || (REG_BR_PROB_BASE - e->probability
446          <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
447    return true;
448  return false;
449}
450
451
452/* Set RTL expansion for BB profile.  */
453
454void
455rtl_profile_for_bb (basic_block bb)
456{
457  crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
458}
459
460/* Set RTL expansion for edge profile.  */
461
462void
463rtl_profile_for_edge (edge e)
464{
465  crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
466}
467
468/* Set RTL expansion to default mode (i.e. when profile info is not known).  */
469void
470default_rtl_profile (void)
471{
472  crtl->maybe_hot_insn_p = true;
473}
474
475/* Return true if the one of outgoing edges is already predicted by
476   PREDICTOR.  */
477
478bool
479rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
480{
481  rtx note;
482  if (!INSN_P (BB_END (bb)))
483    return false;
484  for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
485    if (REG_NOTE_KIND (note) == REG_BR_PRED
486	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
487      return true;
488  return false;
489}
490
491/* This map contains for a basic block the list of predictions for the
492   outgoing edges.  */
493
494static struct pointer_map_t *bb_predictions;
495
496/*  Structure representing predictions in tree level. */
497
498struct edge_prediction {
499    struct edge_prediction *ep_next;
500    edge ep_edge;
501    enum br_predictor ep_predictor;
502    int ep_probability;
503};
504
505/* Return true if the one of outgoing edges is already predicted by
506   PREDICTOR.  */
507
508bool
509gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
510{
511  struct edge_prediction *i;
512  void **preds = pointer_map_contains (bb_predictions, bb);
513
514  if (!preds)
515    return false;
516
517  for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
518    if (i->ep_predictor == predictor)
519      return true;
520  return false;
521}
522
523/* Return true when the probability of edge is reliable.
524
525   The profile guessing code is good at predicting branch outcome (ie.
526   taken/not taken), that is predicted right slightly over 75% of time.
527   It is however notoriously poor on predicting the probability itself.
528   In general the profile appear a lot flatter (with probabilities closer
529   to 50%) than the reality so it is bad idea to use it to drive optimization
530   such as those disabling dynamic branch prediction for well predictable
531   branches.
532
533   There are two exceptions - edges leading to noreturn edges and edges
534   predicted by number of iterations heuristics are predicted well.  This macro
535   should be able to distinguish those, but at the moment it simply check for
536   noreturn heuristic that is only one giving probability over 99% or bellow
537   1%.  In future we might want to propagate reliability information across the
538   CFG if we find this information useful on multiple places.   */
539static bool
540probability_reliable_p (int prob)
541{
542  return (profile_status_for_fn (cfun) == PROFILE_READ
543	  || (profile_status_for_fn (cfun) == PROFILE_GUESSED
544	      && (prob <= HITRATE (1) || prob >= HITRATE (99))));
545}
546
547/* Same predicate as above, working on edges.  */
548bool
549edge_probability_reliable_p (const_edge e)
550{
551  return probability_reliable_p (e->probability);
552}
553
554/* Same predicate as edge_probability_reliable_p, working on notes.  */
555bool
556br_prob_note_reliable_p (const_rtx note)
557{
558  gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
559  return probability_reliable_p (XINT (note, 0));
560}
561
562static void
563predict_insn (rtx insn, enum br_predictor predictor, int probability)
564{
565  gcc_assert (any_condjump_p (insn));
566  if (!flag_guess_branch_prob)
567    return;
568
569  add_reg_note (insn, REG_BR_PRED,
570		gen_rtx_CONCAT (VOIDmode,
571				GEN_INT ((int) predictor),
572				GEN_INT ((int) probability)));
573}
574
575/* Predict insn by given predictor.  */
576
577void
578predict_insn_def (rtx insn, enum br_predictor predictor,
579		  enum prediction taken)
580{
581   int probability = predictor_info[(int) predictor].hitrate;
582
583   if (taken != TAKEN)
584     probability = REG_BR_PROB_BASE - probability;
585
586   predict_insn (insn, predictor, probability);
587}
588
589/* Predict edge E with given probability if possible.  */
590
591void
592rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
593{
594  rtx last_insn;
595  last_insn = BB_END (e->src);
596
597  /* We can store the branch prediction information only about
598     conditional jumps.  */
599  if (!any_condjump_p (last_insn))
600    return;
601
602  /* We always store probability of branching.  */
603  if (e->flags & EDGE_FALLTHRU)
604    probability = REG_BR_PROB_BASE - probability;
605
606  predict_insn (last_insn, predictor, probability);
607}
608
609/* Predict edge E with the given PROBABILITY.  */
610void
611gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
612{
613  gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
614  if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
615       1)
616      && flag_guess_branch_prob && optimize)
617    {
618      struct edge_prediction *i = XNEW (struct edge_prediction);
619      void **preds = pointer_map_insert (bb_predictions, e->src);
620
621      i->ep_next = (struct edge_prediction *) *preds;
622      *preds = i;
623      i->ep_probability = probability;
624      i->ep_predictor = predictor;
625      i->ep_edge = e;
626    }
627}
628
629/* Remove all predictions on given basic block that are attached
630   to edge E.  */
631void
632remove_predictions_associated_with_edge (edge e)
633{
634  void **preds;
635
636  if (!bb_predictions)
637    return;
638
639  preds = pointer_map_contains (bb_predictions, e->src);
640
641  if (preds)
642    {
643      struct edge_prediction **prediction = (struct edge_prediction **) preds;
644      struct edge_prediction *next;
645
646      while (*prediction)
647	{
648	  if ((*prediction)->ep_edge == e)
649	    {
650	      next = (*prediction)->ep_next;
651	      free (*prediction);
652	      *prediction = next;
653	    }
654	  else
655	    prediction = &((*prediction)->ep_next);
656	}
657    }
658}
659
660/* Clears the list of predictions stored for BB.  */
661
662static void
663clear_bb_predictions (basic_block bb)
664{
665  void **preds = pointer_map_contains (bb_predictions, bb);
666  struct edge_prediction *pred, *next;
667
668  if (!preds)
669    return;
670
671  for (pred = (struct edge_prediction *) *preds; pred; pred = next)
672    {
673      next = pred->ep_next;
674      free (pred);
675    }
676  *preds = NULL;
677}
678
679/* Return true when we can store prediction on insn INSN.
680   At the moment we represent predictions only on conditional
681   jumps, not at computed jump or other complicated cases.  */
682static bool
683can_predict_insn_p (const_rtx insn)
684{
685  return (JUMP_P (insn)
686	  && any_condjump_p (insn)
687	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
688}
689
690/* Predict edge E by given predictor if possible.  */
691
692void
693predict_edge_def (edge e, enum br_predictor predictor,
694		  enum prediction taken)
695{
696   int probability = predictor_info[(int) predictor].hitrate;
697
698   if (taken != TAKEN)
699     probability = REG_BR_PROB_BASE - probability;
700
701   predict_edge (e, predictor, probability);
702}
703
704/* Invert all branch predictions or probability notes in the INSN.  This needs
705   to be done each time we invert the condition used by the jump.  */
706
707void
708invert_br_probabilities (rtx insn)
709{
710  rtx note;
711
712  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
713    if (REG_NOTE_KIND (note) == REG_BR_PROB)
714      XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
715    else if (REG_NOTE_KIND (note) == REG_BR_PRED)
716      XEXP (XEXP (note, 0), 1)
717	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
718}
719
720/* Dump information about the branch prediction to the output file.  */
721
722static void
723dump_prediction (FILE *file, enum br_predictor predictor, int probability,
724		 basic_block bb, int used)
725{
726  edge e;
727  edge_iterator ei;
728
729  if (!file)
730    return;
731
732  FOR_EACH_EDGE (e, ei, bb->succs)
733    if (! (e->flags & EDGE_FALLTHRU))
734      break;
735
736  fprintf (file, "  %s heuristics%s: %.1f%%",
737	   predictor_info[predictor].name,
738	   used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
739
740  if (bb->count)
741    {
742      fprintf (file, "  exec ");
743      fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
744      if (e)
745	{
746	  fprintf (file, " hit ");
747	  fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
748	  fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
749	}
750    }
751
752  fprintf (file, "\n");
753}
754
755/* We can not predict the probabilities of outgoing edges of bb.  Set them
756   evenly and hope for the best.  */
757static void
758set_even_probabilities (basic_block bb)
759{
760  int nedges = 0;
761  edge e;
762  edge_iterator ei;
763
764  FOR_EACH_EDGE (e, ei, bb->succs)
765    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
766      nedges ++;
767  FOR_EACH_EDGE (e, ei, bb->succs)
768    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
769      e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
770    else
771      e->probability = 0;
772}
773
774/* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
775   note if not already present.  Remove now useless REG_BR_PRED notes.  */
776
777static void
778combine_predictions_for_insn (rtx insn, basic_block bb)
779{
780  rtx prob_note;
781  rtx *pnote;
782  rtx note;
783  int best_probability = PROB_EVEN;
784  enum br_predictor best_predictor = END_PREDICTORS;
785  int combined_probability = REG_BR_PROB_BASE / 2;
786  int d;
787  bool first_match = false;
788  bool found = false;
789
790  if (!can_predict_insn_p (insn))
791    {
792      set_even_probabilities (bb);
793      return;
794    }
795
796  prob_note = find_reg_note (insn, REG_BR_PROB, 0);
797  pnote = &REG_NOTES (insn);
798  if (dump_file)
799    fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
800	     bb->index);
801
802  /* We implement "first match" heuristics and use probability guessed
803     by predictor with smallest index.  */
804  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
805    if (REG_NOTE_KIND (note) == REG_BR_PRED)
806      {
807	enum br_predictor predictor = ((enum br_predictor)
808				       INTVAL (XEXP (XEXP (note, 0), 0)));
809	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
810
811	found = true;
812	if (best_predictor > predictor)
813	  best_probability = probability, best_predictor = predictor;
814
815	d = (combined_probability * probability
816	     + (REG_BR_PROB_BASE - combined_probability)
817	     * (REG_BR_PROB_BASE - probability));
818
819	/* Use FP math to avoid overflows of 32bit integers.  */
820	if (d == 0)
821	  /* If one probability is 0% and one 100%, avoid division by zero.  */
822	  combined_probability = REG_BR_PROB_BASE / 2;
823	else
824	  combined_probability = (((double) combined_probability) * probability
825				  * REG_BR_PROB_BASE / d + 0.5);
826      }
827
828  /* Decide which heuristic to use.  In case we didn't match anything,
829     use no_prediction heuristic, in case we did match, use either
830     first match or Dempster-Shaffer theory depending on the flags.  */
831
832  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
833    first_match = true;
834
835  if (!found)
836    dump_prediction (dump_file, PRED_NO_PREDICTION,
837		     combined_probability, bb, true);
838  else
839    {
840      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
841		       bb, !first_match);
842      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
843		       bb, first_match);
844    }
845
846  if (first_match)
847    combined_probability = best_probability;
848  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
849
850  while (*pnote)
851    {
852      if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
853	{
854	  enum br_predictor predictor = ((enum br_predictor)
855					 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
856	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
857
858	  dump_prediction (dump_file, predictor, probability, bb,
859			   !first_match || best_predictor == predictor);
860	  *pnote = XEXP (*pnote, 1);
861	}
862      else
863	pnote = &XEXP (*pnote, 1);
864    }
865
866  if (!prob_note)
867    {
868      add_int_reg_note (insn, REG_BR_PROB, combined_probability);
869
870      /* Save the prediction into CFG in case we are seeing non-degenerated
871	 conditional jump.  */
872      if (!single_succ_p (bb))
873	{
874	  BRANCH_EDGE (bb)->probability = combined_probability;
875	  FALLTHRU_EDGE (bb)->probability
876	    = REG_BR_PROB_BASE - combined_probability;
877	}
878    }
879  else if (!single_succ_p (bb))
880    {
881      int prob = XINT (prob_note, 0);
882
883      BRANCH_EDGE (bb)->probability = prob;
884      FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
885    }
886  else
887    single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
888}
889
890/* Combine predictions into single probability and store them into CFG.
891   Remove now useless prediction entries.  */
892
893static void
894combine_predictions_for_bb (basic_block bb)
895{
896  int best_probability = PROB_EVEN;
897  enum br_predictor best_predictor = END_PREDICTORS;
898  int combined_probability = REG_BR_PROB_BASE / 2;
899  int d;
900  bool first_match = false;
901  bool found = false;
902  struct edge_prediction *pred;
903  int nedges = 0;
904  edge e, first = NULL, second = NULL;
905  edge_iterator ei;
906  void **preds;
907
908  FOR_EACH_EDGE (e, ei, bb->succs)
909    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
910      {
911	nedges ++;
912	if (first && !second)
913	  second = e;
914	if (!first)
915	  first = e;
916      }
917
918  /* When there is no successor or only one choice, prediction is easy.
919
920     We are lazy for now and predict only basic blocks with two outgoing
921     edges.  It is possible to predict generic case too, but we have to
922     ignore first match heuristics and do more involved combining.  Implement
923     this later.  */
924  if (nedges != 2)
925    {
926      if (!bb->count)
927	set_even_probabilities (bb);
928      clear_bb_predictions (bb);
929      if (dump_file)
930	fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
931		 nedges, bb->index);
932      return;
933    }
934
935  if (dump_file)
936    fprintf (dump_file, "Predictions for bb %i\n", bb->index);
937
938  preds = pointer_map_contains (bb_predictions, bb);
939  if (preds)
940    {
941      /* We implement "first match" heuristics and use probability guessed
942	 by predictor with smallest index.  */
943      for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
944	{
945	  enum br_predictor predictor = pred->ep_predictor;
946	  int probability = pred->ep_probability;
947
948	  if (pred->ep_edge != first)
949	    probability = REG_BR_PROB_BASE - probability;
950
951	  found = true;
952	  /* First match heuristics would be widly confused if we predicted
953	     both directions.  */
954	  if (best_predictor > predictor)
955	    {
956              struct edge_prediction *pred2;
957	      int prob = probability;
958
959	      for (pred2 = (struct edge_prediction *) *preds;
960		   pred2; pred2 = pred2->ep_next)
961	       if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
962	         {
963	           int probability2 = pred->ep_probability;
964
965		   if (pred2->ep_edge != first)
966		     probability2 = REG_BR_PROB_BASE - probability2;
967
968		   if ((probability < REG_BR_PROB_BASE / 2) !=
969		       (probability2 < REG_BR_PROB_BASE / 2))
970		     break;
971
972		   /* If the same predictor later gave better result, go for it! */
973		   if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
974		       || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
975		     prob = probability2;
976		 }
977	      if (!pred2)
978	        best_probability = prob, best_predictor = predictor;
979	    }
980
981	  d = (combined_probability * probability
982	       + (REG_BR_PROB_BASE - combined_probability)
983	       * (REG_BR_PROB_BASE - probability));
984
985	  /* Use FP math to avoid overflows of 32bit integers.  */
986	  if (d == 0)
987	    /* If one probability is 0% and one 100%, avoid division by zero.  */
988	    combined_probability = REG_BR_PROB_BASE / 2;
989	  else
990	    combined_probability = (((double) combined_probability)
991				    * probability
992		    		    * REG_BR_PROB_BASE / d + 0.5);
993	}
994    }
995
996  /* Decide which heuristic to use.  In case we didn't match anything,
997     use no_prediction heuristic, in case we did match, use either
998     first match or Dempster-Shaffer theory depending on the flags.  */
999
1000  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
1001    first_match = true;
1002
1003  if (!found)
1004    dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1005  else
1006    {
1007      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1008		       !first_match);
1009      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1010		       first_match);
1011    }
1012
1013  if (first_match)
1014    combined_probability = best_probability;
1015  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1016
1017  if (preds)
1018    {
1019      for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1020	{
1021	  enum br_predictor predictor = pred->ep_predictor;
1022	  int probability = pred->ep_probability;
1023
1024	  if (pred->ep_edge != EDGE_SUCC (bb, 0))
1025	    probability = REG_BR_PROB_BASE - probability;
1026	  dump_prediction (dump_file, predictor, probability, bb,
1027			   !first_match || best_predictor == predictor);
1028	}
1029    }
1030  clear_bb_predictions (bb);
1031
1032  if (!bb->count)
1033    {
1034      first->probability = combined_probability;
1035      second->probability = REG_BR_PROB_BASE - combined_probability;
1036    }
1037}
1038
1039/* Check if T1 and T2 satisfy the IV_COMPARE condition.
1040   Return the SSA_NAME if the condition satisfies, NULL otherwise.
1041
1042   T1 and T2 should be one of the following cases:
1043     1. T1 is SSA_NAME, T2 is NULL
1044     2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1045     3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1046
1047static tree
1048strips_small_constant (tree t1, tree t2)
1049{
1050  tree ret = NULL;
1051  int value = 0;
1052
1053  if (!t1)
1054    return NULL;
1055  else if (TREE_CODE (t1) == SSA_NAME)
1056    ret = t1;
1057  else if (tree_fits_shwi_p (t1))
1058    value = tree_to_shwi (t1);
1059  else
1060    return NULL;
1061
1062  if (!t2)
1063    return ret;
1064  else if (tree_fits_shwi_p (t2))
1065    value = tree_to_shwi (t2);
1066  else if (TREE_CODE (t2) == SSA_NAME)
1067    {
1068      if (ret)
1069        return NULL;
1070      else
1071        ret = t2;
1072    }
1073
1074  if (value <= 4 && value >= -4)
1075    return ret;
1076  else
1077    return NULL;
1078}
1079
1080/* Return the SSA_NAME in T or T's operands.
1081   Return NULL if SSA_NAME cannot be found.  */
1082
1083static tree
1084get_base_value (tree t)
1085{
1086  if (TREE_CODE (t) == SSA_NAME)
1087    return t;
1088
1089  if (!BINARY_CLASS_P (t))
1090    return NULL;
1091
1092  switch (TREE_OPERAND_LENGTH (t))
1093    {
1094    case 1:
1095      return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1096    case 2:
1097      return strips_small_constant (TREE_OPERAND (t, 0),
1098				    TREE_OPERAND (t, 1));
1099    default:
1100      return NULL;
1101    }
1102}
1103
1104/* Check the compare STMT in LOOP. If it compares an induction
1105   variable to a loop invariant, return true, and save
1106   LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1107   Otherwise return false and set LOOP_INVAIANT to NULL.  */
1108
1109static bool
1110is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1111				     tree *loop_invariant,
1112				     enum tree_code *compare_code,
1113				     tree *loop_step,
1114				     tree *loop_iv_base)
1115{
1116  tree op0, op1, bound, base;
1117  affine_iv iv0, iv1;
1118  enum tree_code code;
1119  tree step;
1120
1121  code = gimple_cond_code (stmt);
1122  *loop_invariant = NULL;
1123
1124  switch (code)
1125    {
1126    case GT_EXPR:
1127    case GE_EXPR:
1128    case NE_EXPR:
1129    case LT_EXPR:
1130    case LE_EXPR:
1131    case EQ_EXPR:
1132      break;
1133
1134    default:
1135      return false;
1136    }
1137
1138  op0 = gimple_cond_lhs (stmt);
1139  op1 = gimple_cond_rhs (stmt);
1140
1141  if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1142       || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1143    return false;
1144  if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1145    return false;
1146  if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1147    return false;
1148  if (TREE_CODE (iv0.step) != INTEGER_CST
1149      || TREE_CODE (iv1.step) != INTEGER_CST)
1150    return false;
1151  if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1152      || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1153    return false;
1154
1155  if (integer_zerop (iv0.step))
1156    {
1157      if (code != NE_EXPR && code != EQ_EXPR)
1158	code = invert_tree_comparison (code, false);
1159      bound = iv0.base;
1160      base = iv1.base;
1161      if (tree_fits_shwi_p (iv1.step))
1162	step = iv1.step;
1163      else
1164	return false;
1165    }
1166  else
1167    {
1168      bound = iv1.base;
1169      base = iv0.base;
1170      if (tree_fits_shwi_p (iv0.step))
1171	step = iv0.step;
1172      else
1173	return false;
1174    }
1175
1176  if (TREE_CODE (bound) != INTEGER_CST)
1177    bound = get_base_value (bound);
1178  if (!bound)
1179    return false;
1180  if (TREE_CODE (base) != INTEGER_CST)
1181    base = get_base_value (base);
1182  if (!base)
1183    return false;
1184
1185  *loop_invariant = bound;
1186  *compare_code = code;
1187  *loop_step = step;
1188  *loop_iv_base = base;
1189  return true;
1190}
1191
1192/* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1193
1194static bool
1195expr_coherent_p (tree t1, tree t2)
1196{
1197  gimple stmt;
1198  tree ssa_name_1 = NULL;
1199  tree ssa_name_2 = NULL;
1200
1201  gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1202  gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1203
1204  if (t1 == t2)
1205    return true;
1206
1207  if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1208    return true;
1209  if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1210    return false;
1211
1212  /* Check to see if t1 is expressed/defined with t2.  */
1213  stmt = SSA_NAME_DEF_STMT (t1);
1214  gcc_assert (stmt != NULL);
1215  if (is_gimple_assign (stmt))
1216    {
1217      ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1218      if (ssa_name_1 && ssa_name_1 == t2)
1219	return true;
1220    }
1221
1222  /* Check to see if t2 is expressed/defined with t1.  */
1223  stmt = SSA_NAME_DEF_STMT (t2);
1224  gcc_assert (stmt != NULL);
1225  if (is_gimple_assign (stmt))
1226    {
1227      ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1228      if (ssa_name_2 && ssa_name_2 == t1)
1229	return true;
1230    }
1231
1232  /* Compare if t1 and t2's def_stmts are identical.  */
1233  if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1234    return true;
1235  else
1236    return false;
1237}
1238
1239/* Predict branch probability of BB when BB contains a branch that compares
1240   an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1241   loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1242
1243   E.g.
1244     for (int i = 0; i < bound; i++) {
1245       if (i < bound - 2)
1246	 computation_1();
1247       else
1248	 computation_2();
1249     }
1250
1251  In this loop, we will predict the branch inside the loop to be taken.  */
1252
1253static void
1254predict_iv_comparison (struct loop *loop, basic_block bb,
1255		       tree loop_bound_var,
1256		       tree loop_iv_base_var,
1257		       enum tree_code loop_bound_code,
1258		       int loop_bound_step)
1259{
1260  gimple stmt;
1261  tree compare_var, compare_base;
1262  enum tree_code compare_code;
1263  tree compare_step_var;
1264  edge then_edge;
1265  edge_iterator ei;
1266
1267  if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1268      || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1269      || predicted_by_p (bb, PRED_LOOP_EXIT))
1270    return;
1271
1272  stmt = last_stmt (bb);
1273  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1274    return;
1275  if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1276					    &compare_code,
1277					    &compare_step_var,
1278					    &compare_base))
1279    return;
1280
1281  /* Find the taken edge.  */
1282  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1283    if (then_edge->flags & EDGE_TRUE_VALUE)
1284      break;
1285
1286  /* When comparing an IV to a loop invariant, NE is more likely to be
1287     taken while EQ is more likely to be not-taken.  */
1288  if (compare_code == NE_EXPR)
1289    {
1290      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1291      return;
1292    }
1293  else if (compare_code == EQ_EXPR)
1294    {
1295      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1296      return;
1297    }
1298
1299  if (!expr_coherent_p (loop_iv_base_var, compare_base))
1300    return;
1301
1302  /* If loop bound, base and compare bound are all constants, we can
1303     calculate the probability directly.  */
1304  if (tree_fits_shwi_p (loop_bound_var)
1305      && tree_fits_shwi_p (compare_var)
1306      && tree_fits_shwi_p (compare_base))
1307    {
1308      int probability;
1309      bool of, overflow = false;
1310      double_int mod, compare_count, tem, loop_count;
1311
1312      double_int loop_bound = tree_to_double_int (loop_bound_var);
1313      double_int compare_bound = tree_to_double_int (compare_var);
1314      double_int base = tree_to_double_int (compare_base);
1315      double_int compare_step = tree_to_double_int (compare_step_var);
1316
1317      /* (loop_bound - base) / compare_step */
1318      tem = loop_bound.sub_with_overflow (base, &of);
1319      overflow |= of;
1320      loop_count = tem.divmod_with_overflow (compare_step,
1321					      0, TRUNC_DIV_EXPR,
1322					      &mod, &of);
1323      overflow |= of;
1324
1325      if ((!compare_step.is_negative ())
1326          ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1327	{
1328	  /* (loop_bound - compare_bound) / compare_step */
1329	  tem = loop_bound.sub_with_overflow (compare_bound, &of);
1330	  overflow |= of;
1331	  compare_count = tem.divmod_with_overflow (compare_step,
1332						     0, TRUNC_DIV_EXPR,
1333						     &mod, &of);
1334	  overflow |= of;
1335	}
1336      else
1337        {
1338	  /* (compare_bound - base) / compare_step */
1339	  tem = compare_bound.sub_with_overflow (base, &of);
1340	  overflow |= of;
1341          compare_count = tem.divmod_with_overflow (compare_step,
1342						     0, TRUNC_DIV_EXPR,
1343						     &mod, &of);
1344	  overflow |= of;
1345	}
1346      if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1347	++compare_count;
1348      if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1349	++loop_count;
1350      if (compare_count.is_negative ())
1351        compare_count = double_int_zero;
1352      if (loop_count.is_negative ())
1353        loop_count = double_int_zero;
1354      if (loop_count.is_zero ())
1355	probability = 0;
1356      else if (compare_count.scmp (loop_count) == 1)
1357	probability = REG_BR_PROB_BASE;
1358      else
1359        {
1360	  /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1361	     could overflow, shift both loop_count and compare_count right
1362	     a bit so that it doesn't overflow.  Note both counts are known not
1363	     to be negative at this point.  */
1364	  int clz_bits = clz_hwi (loop_count.high);
1365	  gcc_assert (REG_BR_PROB_BASE < 32768);
1366	  if (clz_bits < 16)
1367	    {
1368	      loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1369	      compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1370	    }
1371	  tem = compare_count.mul_with_sign (double_int::from_shwi
1372					    (REG_BR_PROB_BASE), true, &of);
1373	  gcc_assert (!of);
1374	  tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1375	  probability = tem.to_uhwi ();
1376	}
1377
1378      if (!overflow)
1379        predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1380
1381      return;
1382    }
1383
1384  if (expr_coherent_p (loop_bound_var, compare_var))
1385    {
1386      if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1387	  && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1388	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1389      else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1390	       && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1391	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1392      else if (loop_bound_code == NE_EXPR)
1393	{
1394	  /* If the loop backedge condition is "(i != bound)", we do
1395	     the comparison based on the step of IV:
1396	     * step < 0 : backedge condition is like (i > bound)
1397	     * step > 0 : backedge condition is like (i < bound)  */
1398	  gcc_assert (loop_bound_step != 0);
1399	  if (loop_bound_step > 0
1400	      && (compare_code == LT_EXPR
1401		  || compare_code == LE_EXPR))
1402	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1403	  else if (loop_bound_step < 0
1404		   && (compare_code == GT_EXPR
1405		       || compare_code == GE_EXPR))
1406	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1407	  else
1408	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1409	}
1410      else
1411	/* The branch is predicted not-taken if loop_bound_code is
1412	   opposite with compare_code.  */
1413	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1414    }
1415  else if (expr_coherent_p (loop_iv_base_var, compare_var))
1416    {
1417      /* For cases like:
1418	   for (i = s; i < h; i++)
1419	     if (i > s + 2) ....
1420	 The branch should be predicted taken.  */
1421      if (loop_bound_step > 0
1422	  && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1423	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1424      else if (loop_bound_step < 0
1425	       && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1426	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1427      else
1428	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1429    }
1430}
1431
1432/* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1433   exits are resulted from short-circuit conditions that will generate an
1434   if_tmp. E.g.:
1435
1436   if (foo() || global > 10)
1437     break;
1438
1439   This will be translated into:
1440
1441   BB3:
1442     loop header...
1443   BB4:
1444     if foo() goto BB6 else goto BB5
1445   BB5:
1446     if global > 10 goto BB6 else goto BB7
1447   BB6:
1448     goto BB7
1449   BB7:
1450     iftmp = (PHI 0(BB5), 1(BB6))
1451     if iftmp == 1 goto BB8 else goto BB3
1452   BB8:
1453     outside of the loop...
1454
1455   The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1456   From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1457   exits. This function takes BB7->BB8 as input, and finds out the extra loop
1458   exits to predict them using PRED_LOOP_EXIT.  */
1459
1460static void
1461predict_extra_loop_exits (edge exit_edge)
1462{
1463  unsigned i;
1464  bool check_value_one;
1465  gimple phi_stmt;
1466  tree cmp_rhs, cmp_lhs;
1467  gimple cmp_stmt = last_stmt (exit_edge->src);
1468
1469  if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1470    return;
1471  cmp_rhs = gimple_cond_rhs (cmp_stmt);
1472  cmp_lhs = gimple_cond_lhs (cmp_stmt);
1473  if (!TREE_CONSTANT (cmp_rhs)
1474      || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1475    return;
1476  if (TREE_CODE (cmp_lhs) != SSA_NAME)
1477    return;
1478
1479  /* If check_value_one is true, only the phi_args with value '1' will lead
1480     to loop exit. Otherwise, only the phi_args with value '0' will lead to
1481     loop exit.  */
1482  check_value_one = (((integer_onep (cmp_rhs))
1483		    ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1484		    ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1485
1486  phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1487  if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1488    return;
1489
1490  for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1491    {
1492      edge e1;
1493      edge_iterator ei;
1494      tree val = gimple_phi_arg_def (phi_stmt, i);
1495      edge e = gimple_phi_arg_edge (phi_stmt, i);
1496
1497      if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1498	continue;
1499      if ((check_value_one ^ integer_onep (val)) == 1)
1500	continue;
1501      if (EDGE_COUNT (e->src->succs) != 1)
1502	{
1503	  predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1504	  continue;
1505	}
1506
1507      FOR_EACH_EDGE (e1, ei, e->src->preds)
1508	predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1509    }
1510}
1511
1512/* Predict edge probabilities by exploiting loop structure.  */
1513
1514static void
1515predict_loops (void)
1516{
1517  struct loop *loop;
1518
1519  /* Try to predict out blocks in a loop that are not part of a
1520     natural loop.  */
1521  FOR_EACH_LOOP (loop, 0)
1522    {
1523      basic_block bb, *bbs;
1524      unsigned j, n_exits;
1525      vec<edge> exits;
1526      struct tree_niter_desc niter_desc;
1527      edge ex;
1528      struct nb_iter_bound *nb_iter;
1529      enum tree_code loop_bound_code = ERROR_MARK;
1530      tree loop_bound_step = NULL;
1531      tree loop_bound_var = NULL;
1532      tree loop_iv_base = NULL;
1533      gimple stmt = NULL;
1534
1535      exits = get_loop_exit_edges (loop);
1536      n_exits = exits.length ();
1537      if (!n_exits)
1538	{
1539          exits.release ();
1540	  continue;
1541	}
1542
1543      FOR_EACH_VEC_ELT (exits, j, ex)
1544	{
1545	  tree niter = NULL;
1546	  HOST_WIDE_INT nitercst;
1547	  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1548	  int probability;
1549	  enum br_predictor predictor;
1550
1551	  predict_extra_loop_exits (ex);
1552
1553	  if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1554	    niter = niter_desc.niter;
1555	  if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1556	    niter = loop_niter_by_eval (loop, ex);
1557
1558	  if (TREE_CODE (niter) == INTEGER_CST)
1559	    {
1560	      if (tree_fits_uhwi_p (niter)
1561		  && max
1562		  && compare_tree_int (niter, max - 1) == -1)
1563		nitercst = tree_to_uhwi (niter) + 1;
1564	      else
1565		nitercst = max;
1566	      predictor = PRED_LOOP_ITERATIONS;
1567	    }
1568	  /* If we have just one exit and we can derive some information about
1569	     the number of iterations of the loop from the statements inside
1570	     the loop, use it to predict this exit.  */
1571	  else if (n_exits == 1)
1572	    {
1573	      nitercst = estimated_stmt_executions_int (loop);
1574	      if (nitercst < 0)
1575		continue;
1576	      if (nitercst > max)
1577		nitercst = max;
1578
1579	      predictor = PRED_LOOP_ITERATIONS_GUESSED;
1580	    }
1581	  else
1582	    continue;
1583
1584	  /* If the prediction for number of iterations is zero, do not
1585	     predict the exit edges.  */
1586	  if (nitercst == 0)
1587	    continue;
1588
1589	  probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1590	  predict_edge (ex, predictor, probability);
1591	}
1592      exits.release ();
1593
1594      /* Find information about loop bound variables.  */
1595      for (nb_iter = loop->bounds; nb_iter;
1596	   nb_iter = nb_iter->next)
1597	if (nb_iter->stmt
1598	    && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1599	  {
1600	    stmt = nb_iter->stmt;
1601	    break;
1602	  }
1603      if (!stmt && last_stmt (loop->header)
1604	  && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1605	stmt = last_stmt (loop->header);
1606      if (stmt)
1607	is_comparison_with_loop_invariant_p (stmt, loop,
1608					     &loop_bound_var,
1609					     &loop_bound_code,
1610					     &loop_bound_step,
1611					     &loop_iv_base);
1612
1613      bbs = get_loop_body (loop);
1614
1615      for (j = 0; j < loop->num_nodes; j++)
1616	{
1617	  int header_found = 0;
1618	  edge e;
1619	  edge_iterator ei;
1620
1621	  bb = bbs[j];
1622
1623	  /* Bypass loop heuristics on continue statement.  These
1624	     statements construct loops via "non-loop" constructs
1625	     in the source language and are better to be handled
1626	     separately.  */
1627	  if (predicted_by_p (bb, PRED_CONTINUE))
1628	    continue;
1629
1630	  /* Loop branch heuristics - predict an edge back to a
1631	     loop's head as taken.  */
1632	  if (bb == loop->latch)
1633	    {
1634	      e = find_edge (loop->latch, loop->header);
1635	      if (e)
1636		{
1637		  header_found = 1;
1638		  predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1639		}
1640	    }
1641
1642	  /* Loop exit heuristics - predict an edge exiting the loop if the
1643	     conditional has no loop header successors as not taken.  */
1644	  if (!header_found
1645	      /* If we already used more reliable loop exit predictors, do not
1646		 bother with PRED_LOOP_EXIT.  */
1647	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1648	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1649	    {
1650	      /* For loop with many exits we don't want to predict all exits
1651	         with the pretty large probability, because if all exits are
1652		 considered in row, the loop would be predicted to iterate
1653		 almost never.  The code to divide probability by number of
1654		 exits is very rough.  It should compute the number of exits
1655		 taken in each patch through function (not the overall number
1656		 of exits that might be a lot higher for loops with wide switch
1657		 statements in them) and compute n-th square root.
1658
1659		 We limit the minimal probability by 2% to avoid
1660		 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1661		 as this was causing regression in perl benchmark containing such
1662		 a wide loop.  */
1663
1664	      int probability = ((REG_BR_PROB_BASE
1665		                  - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1666				 / n_exits);
1667	      if (probability < HITRATE (2))
1668		probability = HITRATE (2);
1669	      FOR_EACH_EDGE (e, ei, bb->succs)
1670		if (e->dest->index < NUM_FIXED_BLOCKS
1671		    || !flow_bb_inside_loop_p (loop, e->dest))
1672		  predict_edge (e, PRED_LOOP_EXIT, probability);
1673	    }
1674	  if (loop_bound_var)
1675	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1676				   loop_bound_code,
1677				   tree_to_shwi (loop_bound_step));
1678	}
1679
1680      /* Free basic blocks from get_loop_body.  */
1681      free (bbs);
1682    }
1683}
1684
1685/* Attempt to predict probabilities of BB outgoing edges using local
1686   properties.  */
1687static void
1688bb_estimate_probability_locally (basic_block bb)
1689{
1690  rtx last_insn = BB_END (bb);
1691  rtx cond;
1692
1693  if (! can_predict_insn_p (last_insn))
1694    return;
1695  cond = get_condition (last_insn, NULL, false, false);
1696  if (! cond)
1697    return;
1698
1699  /* Try "pointer heuristic."
1700     A comparison ptr == 0 is predicted as false.
1701     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1702  if (COMPARISON_P (cond)
1703      && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1704	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1705    {
1706      if (GET_CODE (cond) == EQ)
1707	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1708      else if (GET_CODE (cond) == NE)
1709	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1710    }
1711  else
1712
1713  /* Try "opcode heuristic."
1714     EQ tests are usually false and NE tests are usually true. Also,
1715     most quantities are positive, so we can make the appropriate guesses
1716     about signed comparisons against zero.  */
1717    switch (GET_CODE (cond))
1718      {
1719      case CONST_INT:
1720	/* Unconditional branch.  */
1721	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1722			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
1723	break;
1724
1725      case EQ:
1726      case UNEQ:
1727	/* Floating point comparisons appears to behave in a very
1728	   unpredictable way because of special role of = tests in
1729	   FP code.  */
1730	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1731	  ;
1732	/* Comparisons with 0 are often used for booleans and there is
1733	   nothing useful to predict about them.  */
1734	else if (XEXP (cond, 1) == const0_rtx
1735		 || XEXP (cond, 0) == const0_rtx)
1736	  ;
1737	else
1738	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1739	break;
1740
1741      case NE:
1742      case LTGT:
1743	/* Floating point comparisons appears to behave in a very
1744	   unpredictable way because of special role of = tests in
1745	   FP code.  */
1746	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1747	  ;
1748	/* Comparisons with 0 are often used for booleans and there is
1749	   nothing useful to predict about them.  */
1750	else if (XEXP (cond, 1) == const0_rtx
1751		 || XEXP (cond, 0) == const0_rtx)
1752	  ;
1753	else
1754	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1755	break;
1756
1757      case ORDERED:
1758	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1759	break;
1760
1761      case UNORDERED:
1762	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1763	break;
1764
1765      case LE:
1766      case LT:
1767	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1768	    || XEXP (cond, 1) == constm1_rtx)
1769	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1770	break;
1771
1772      case GE:
1773      case GT:
1774	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1775	    || XEXP (cond, 1) == constm1_rtx)
1776	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1777	break;
1778
1779      default:
1780	break;
1781      }
1782}
1783
1784/* Set edge->probability for each successor edge of BB.  */
1785void
1786guess_outgoing_edge_probabilities (basic_block bb)
1787{
1788  bb_estimate_probability_locally (bb);
1789  combine_predictions_for_insn (BB_END (bb), bb);
1790}
1791
1792static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1793
1794/* Helper function for expr_expected_value.  */
1795
1796static tree
1797expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1798		       tree op1, bitmap visited, enum br_predictor *predictor)
1799{
1800  gimple def;
1801
1802  if (predictor)
1803    *predictor = PRED_UNCONDITIONAL;
1804
1805  if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1806    {
1807      if (TREE_CONSTANT (op0))
1808	return op0;
1809
1810      if (code != SSA_NAME)
1811	return NULL_TREE;
1812
1813      def = SSA_NAME_DEF_STMT (op0);
1814
1815      /* If we were already here, break the infinite cycle.  */
1816      if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1817	return NULL;
1818
1819      if (gimple_code (def) == GIMPLE_PHI)
1820	{
1821	  /* All the arguments of the PHI node must have the same constant
1822	     length.  */
1823	  int i, n = gimple_phi_num_args (def);
1824	  tree val = NULL, new_val;
1825
1826	  for (i = 0; i < n; i++)
1827	    {
1828	      tree arg = PHI_ARG_DEF (def, i);
1829	      enum br_predictor predictor2;
1830
1831	      /* If this PHI has itself as an argument, we cannot
1832		 determine the string length of this argument.  However,
1833		 if we can find an expected constant value for the other
1834		 PHI args then we can still be sure that this is
1835		 likely a constant.  So be optimistic and just
1836		 continue with the next argument.  */
1837	      if (arg == PHI_RESULT (def))
1838		continue;
1839
1840	      new_val = expr_expected_value (arg, visited, &predictor2);
1841
1842	      /* It is difficult to combine value predictors.  Simply assume
1843		 that later predictor is weaker and take its prediction.  */
1844	      if (predictor && *predictor < predictor2)
1845		*predictor = predictor2;
1846	      if (!new_val)
1847		return NULL;
1848	      if (!val)
1849		val = new_val;
1850	      else if (!operand_equal_p (val, new_val, false))
1851		return NULL;
1852	    }
1853	  return val;
1854	}
1855      if (is_gimple_assign (def))
1856	{
1857	  if (gimple_assign_lhs (def) != op0)
1858	    return NULL;
1859
1860	  return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1861					gimple_assign_rhs1 (def),
1862					gimple_assign_rhs_code (def),
1863					gimple_assign_rhs2 (def),
1864					visited, predictor);
1865	}
1866
1867      if (is_gimple_call (def))
1868	{
1869	  tree decl = gimple_call_fndecl (def);
1870	  if (!decl)
1871	    {
1872	      if (gimple_call_internal_p (def)
1873		  && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1874		{
1875		  gcc_assert (gimple_call_num_args (def) == 3);
1876		  tree val = gimple_call_arg (def, 0);
1877		  if (TREE_CONSTANT (val))
1878		    return val;
1879		  if (predictor)
1880		    {
1881		      *predictor = PRED_BUILTIN_EXPECT;
1882		      tree val2 = gimple_call_arg (def, 2);
1883		      gcc_assert (TREE_CODE (val2) == INTEGER_CST
1884				  && tree_fits_uhwi_p (val2)
1885				  && tree_to_uhwi (val2) < END_PREDICTORS);
1886		      *predictor = (enum br_predictor) tree_to_uhwi (val2);
1887		    }
1888		  return gimple_call_arg (def, 1);
1889		}
1890	      return NULL;
1891	    }
1892	  if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1893	    switch (DECL_FUNCTION_CODE (decl))
1894	      {
1895	      case BUILT_IN_EXPECT:
1896		{
1897		  tree val;
1898		  if (gimple_call_num_args (def) != 2)
1899		    return NULL;
1900		  val = gimple_call_arg (def, 0);
1901		  if (TREE_CONSTANT (val))
1902		    return val;
1903		  if (predictor)
1904		    *predictor = PRED_BUILTIN_EXPECT;
1905		  return gimple_call_arg (def, 1);
1906		}
1907
1908	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1909	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1910	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1911	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1912	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1913	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1914	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1915	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1916	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1917	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1918	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1919	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1920	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1921		/* Assume that any given atomic operation has low contention,
1922		   and thus the compare-and-swap operation succeeds.  */
1923		if (predictor)
1924		  *predictor = PRED_COMPARE_AND_SWAP;
1925		return boolean_true_node;
1926	    }
1927	}
1928
1929      return NULL;
1930    }
1931
1932  if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1933    {
1934      tree res;
1935      enum br_predictor predictor2;
1936      op0 = expr_expected_value (op0, visited, predictor);
1937      if (!op0)
1938	return NULL;
1939      op1 = expr_expected_value (op1, visited, &predictor2);
1940      if (predictor && *predictor < predictor2)
1941	*predictor = predictor2;
1942      if (!op1)
1943	return NULL;
1944      res = fold_build2 (code, type, op0, op1);
1945      if (TREE_CONSTANT (res))
1946	return res;
1947      return NULL;
1948    }
1949  if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1950    {
1951      tree res;
1952      op0 = expr_expected_value (op0, visited, predictor);
1953      if (!op0)
1954	return NULL;
1955      res = fold_build1 (code, type, op0);
1956      if (TREE_CONSTANT (res))
1957	return res;
1958      return NULL;
1959    }
1960  return NULL;
1961}
1962
1963/* Return constant EXPR will likely have at execution time, NULL if unknown.
1964   The function is used by builtin_expect branch predictor so the evidence
1965   must come from this construct and additional possible constant folding.
1966
1967   We may want to implement more involved value guess (such as value range
1968   propagation based prediction), but such tricks shall go to new
1969   implementation.  */
1970
1971static tree
1972expr_expected_value (tree expr, bitmap visited,
1973		     enum br_predictor *predictor)
1974{
1975  enum tree_code code;
1976  tree op0, op1;
1977
1978  if (TREE_CONSTANT (expr))
1979    {
1980      if (predictor)
1981	*predictor = PRED_UNCONDITIONAL;
1982      return expr;
1983    }
1984
1985  extract_ops_from_tree (expr, &code, &op0, &op1);
1986  return expr_expected_value_1 (TREE_TYPE (expr),
1987				op0, code, op1, visited, predictor);
1988}
1989
1990
1991/* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1992   we no longer need.  */
1993static unsigned int
1994strip_predict_hints (void)
1995{
1996  basic_block bb;
1997  gimple ass_stmt;
1998  tree var;
1999
2000  FOR_EACH_BB_FN (bb, cfun)
2001    {
2002      gimple_stmt_iterator bi;
2003      for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
2004	{
2005	  gimple stmt = gsi_stmt (bi);
2006
2007	  if (gimple_code (stmt) == GIMPLE_PREDICT)
2008	    {
2009	      gsi_remove (&bi, true);
2010	      continue;
2011	    }
2012	  else if (is_gimple_call (stmt))
2013	    {
2014	      tree fndecl = gimple_call_fndecl (stmt);
2015
2016	      if ((fndecl
2017		   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
2018		   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
2019		   && gimple_call_num_args (stmt) == 2)
2020		  || (gimple_call_internal_p (stmt)
2021		      && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
2022		{
2023		  var = gimple_call_lhs (stmt);
2024		  if (var)
2025		    {
2026		      ass_stmt
2027			= gimple_build_assign (var, gimple_call_arg (stmt, 0));
2028		      gsi_replace (&bi, ass_stmt, true);
2029		    }
2030		  else
2031		    {
2032		      gsi_remove (&bi, true);
2033		      continue;
2034		    }
2035		}
2036	    }
2037	  gsi_next (&bi);
2038	}
2039    }
2040  return 0;
2041}
2042
2043/* Predict using opcode of the last statement in basic block.  */
2044static void
2045tree_predict_by_opcode (basic_block bb)
2046{
2047  gimple stmt = last_stmt (bb);
2048  edge then_edge;
2049  tree op0, op1;
2050  tree type;
2051  tree val;
2052  enum tree_code cmp;
2053  bitmap visited;
2054  edge_iterator ei;
2055  enum br_predictor predictor;
2056
2057  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2058    return;
2059  FOR_EACH_EDGE (then_edge, ei, bb->succs)
2060    if (then_edge->flags & EDGE_TRUE_VALUE)
2061      break;
2062  op0 = gimple_cond_lhs (stmt);
2063  op1 = gimple_cond_rhs (stmt);
2064  cmp = gimple_cond_code (stmt);
2065  type = TREE_TYPE (op0);
2066  visited = BITMAP_ALLOC (NULL);
2067  val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
2068			       &predictor);
2069  BITMAP_FREE (visited);
2070  if (val && TREE_CODE (val) == INTEGER_CST)
2071    {
2072      if (predictor == PRED_BUILTIN_EXPECT)
2073	{
2074	  int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2075
2076	  gcc_assert (percent >= 0 && percent <= 100);
2077	  if (integer_zerop (val))
2078	    percent = 100 - percent;
2079	  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2080	}
2081      else
2082	predict_edge (then_edge, predictor,
2083		      integer_zerop (val) ? NOT_TAKEN : TAKEN);
2084    }
2085  /* Try "pointer heuristic."
2086     A comparison ptr == 0 is predicted as false.
2087     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2088  if (POINTER_TYPE_P (type))
2089    {
2090      if (cmp == EQ_EXPR)
2091	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2092      else if (cmp == NE_EXPR)
2093	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2094    }
2095  else
2096
2097  /* Try "opcode heuristic."
2098     EQ tests are usually false and NE tests are usually true. Also,
2099     most quantities are positive, so we can make the appropriate guesses
2100     about signed comparisons against zero.  */
2101    switch (cmp)
2102      {
2103      case EQ_EXPR:
2104      case UNEQ_EXPR:
2105	/* Floating point comparisons appears to behave in a very
2106	   unpredictable way because of special role of = tests in
2107	   FP code.  */
2108	if (FLOAT_TYPE_P (type))
2109	  ;
2110	/* Comparisons with 0 are often used for booleans and there is
2111	   nothing useful to predict about them.  */
2112	else if (integer_zerop (op0) || integer_zerop (op1))
2113	  ;
2114	else
2115	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2116	break;
2117
2118      case NE_EXPR:
2119      case LTGT_EXPR:
2120	/* Floating point comparisons appears to behave in a very
2121	   unpredictable way because of special role of = tests in
2122	   FP code.  */
2123	if (FLOAT_TYPE_P (type))
2124	  ;
2125	/* Comparisons with 0 are often used for booleans and there is
2126	   nothing useful to predict about them.  */
2127	else if (integer_zerop (op0)
2128		 || integer_zerop (op1))
2129	  ;
2130	else
2131	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2132	break;
2133
2134      case ORDERED_EXPR:
2135	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2136	break;
2137
2138      case UNORDERED_EXPR:
2139	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2140	break;
2141
2142      case LE_EXPR:
2143      case LT_EXPR:
2144	if (integer_zerop (op1)
2145	    || integer_onep (op1)
2146	    || integer_all_onesp (op1)
2147	    || real_zerop (op1)
2148	    || real_onep (op1)
2149	    || real_minus_onep (op1))
2150	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2151	break;
2152
2153      case GE_EXPR:
2154      case GT_EXPR:
2155	if (integer_zerop (op1)
2156	    || integer_onep (op1)
2157	    || integer_all_onesp (op1)
2158	    || real_zerop (op1)
2159	    || real_onep (op1)
2160	    || real_minus_onep (op1))
2161	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2162	break;
2163
2164      default:
2165	break;
2166      }
2167}
2168
2169/* Try to guess whether the value of return means error code.  */
2170
2171static enum br_predictor
2172return_prediction (tree val, enum prediction *prediction)
2173{
2174  /* VOID.  */
2175  if (!val)
2176    return PRED_NO_PREDICTION;
2177  /* Different heuristics for pointers and scalars.  */
2178  if (POINTER_TYPE_P (TREE_TYPE (val)))
2179    {
2180      /* NULL is usually not returned.  */
2181      if (integer_zerop (val))
2182	{
2183	  *prediction = NOT_TAKEN;
2184	  return PRED_NULL_RETURN;
2185	}
2186    }
2187  else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2188    {
2189      /* Negative return values are often used to indicate
2190         errors.  */
2191      if (TREE_CODE (val) == INTEGER_CST
2192	  && tree_int_cst_sgn (val) < 0)
2193	{
2194	  *prediction = NOT_TAKEN;
2195	  return PRED_NEGATIVE_RETURN;
2196	}
2197      /* Constant return values seems to be commonly taken.
2198         Zero/one often represent booleans so exclude them from the
2199	 heuristics.  */
2200      if (TREE_CONSTANT (val)
2201	  && (!integer_zerop (val) && !integer_onep (val)))
2202	{
2203	  *prediction = TAKEN;
2204	  return PRED_CONST_RETURN;
2205	}
2206    }
2207  return PRED_NO_PREDICTION;
2208}
2209
2210/* Find the basic block with return expression and look up for possible
2211   return value trying to apply RETURN_PREDICTION heuristics.  */
2212static void
2213apply_return_prediction (void)
2214{
2215  gimple return_stmt = NULL;
2216  tree return_val;
2217  edge e;
2218  gimple phi;
2219  int phi_num_args, i;
2220  enum br_predictor pred;
2221  enum prediction direction;
2222  edge_iterator ei;
2223
2224  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2225    {
2226      return_stmt = last_stmt (e->src);
2227      if (return_stmt
2228	  && gimple_code (return_stmt) == GIMPLE_RETURN)
2229	break;
2230    }
2231  if (!e)
2232    return;
2233  return_val = gimple_return_retval (return_stmt);
2234  if (!return_val)
2235    return;
2236  if (TREE_CODE (return_val) != SSA_NAME
2237      || !SSA_NAME_DEF_STMT (return_val)
2238      || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2239    return;
2240  phi = SSA_NAME_DEF_STMT (return_val);
2241  phi_num_args = gimple_phi_num_args (phi);
2242  pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2243
2244  /* Avoid the degenerate case where all return values form the function
2245     belongs to same category (ie they are all positive constants)
2246     so we can hardly say something about them.  */
2247  for (i = 1; i < phi_num_args; i++)
2248    if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2249      break;
2250  if (i != phi_num_args)
2251    for (i = 0; i < phi_num_args; i++)
2252      {
2253	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2254	if (pred != PRED_NO_PREDICTION)
2255	  predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2256				         direction);
2257      }
2258}
2259
2260/* Look for basic block that contains unlikely to happen events
2261   (such as noreturn calls) and mark all paths leading to execution
2262   of this basic blocks as unlikely.  */
2263
2264static void
2265tree_bb_level_predictions (void)
2266{
2267  basic_block bb;
2268  bool has_return_edges = false;
2269  edge e;
2270  edge_iterator ei;
2271
2272  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2273    if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2274      {
2275        has_return_edges = true;
2276	break;
2277      }
2278
2279  apply_return_prediction ();
2280
2281  FOR_EACH_BB_FN (bb, cfun)
2282    {
2283      gimple_stmt_iterator gsi;
2284
2285      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2286	{
2287	  gimple stmt = gsi_stmt (gsi);
2288	  tree decl;
2289
2290	  if (is_gimple_call (stmt))
2291	    {
2292	      if ((gimple_call_flags (stmt) & ECF_NORETURN)
2293	          && has_return_edges)
2294		predict_paths_leading_to (bb, PRED_NORETURN,
2295					  NOT_TAKEN);
2296	      decl = gimple_call_fndecl (stmt);
2297	      if (decl
2298		  && lookup_attribute ("cold",
2299				       DECL_ATTRIBUTES (decl)))
2300		predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2301					  NOT_TAKEN);
2302	    }
2303	  else if (gimple_code (stmt) == GIMPLE_PREDICT)
2304	    {
2305	      predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2306					gimple_predict_outcome (stmt));
2307	      /* Keep GIMPLE_PREDICT around so early inlining will propagate
2308	         hints to callers.  */
2309	    }
2310	}
2311    }
2312}
2313
2314#ifdef ENABLE_CHECKING
2315
2316/* Callback for pointer_map_traverse, asserts that the pointer map is
2317   empty.  */
2318
2319static bool
2320assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2321		 void *data ATTRIBUTE_UNUSED)
2322{
2323  gcc_assert (!*value);
2324  return false;
2325}
2326#endif
2327
2328/* Predict branch probabilities and estimate profile for basic block BB.  */
2329
2330static void
2331tree_estimate_probability_bb (basic_block bb)
2332{
2333  edge e;
2334  edge_iterator ei;
2335  gimple last;
2336
2337  FOR_EACH_EDGE (e, ei, bb->succs)
2338    {
2339      /* Predict edges to user labels with attributes.  */
2340      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2341	{
2342	  gimple_stmt_iterator gi;
2343	  for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2344	    {
2345	      gimple stmt = gsi_stmt (gi);
2346	      tree decl;
2347
2348	      if (gimple_code (stmt) != GIMPLE_LABEL)
2349		break;
2350	      decl = gimple_label_label (stmt);
2351	      if (DECL_ARTIFICIAL (decl))
2352		continue;
2353
2354	      /* Finally, we have a user-defined label.  */
2355	      if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2356		predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2357	      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2358		predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2359	    }
2360	}
2361
2362      /* Predict early returns to be probable, as we've already taken
2363	 care for error returns and other cases are often used for
2364	 fast paths through function.
2365
2366	 Since we've already removed the return statements, we are
2367	 looking for CFG like:
2368
2369	 if (conditional)
2370	 {
2371	 ..
2372	 goto return_block
2373	 }
2374	 some other blocks
2375	 return_block:
2376	 return_stmt.  */
2377      if (e->dest != bb->next_bb
2378	  && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2379	  && single_succ_p (e->dest)
2380	  && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2381	  && (last = last_stmt (e->dest)) != NULL
2382	  && gimple_code (last) == GIMPLE_RETURN)
2383	{
2384	  edge e1;
2385	  edge_iterator ei1;
2386
2387	  if (single_succ_p (bb))
2388	    {
2389	      FOR_EACH_EDGE (e1, ei1, bb->preds)
2390		if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2391		    && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2392		    && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2393		  predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2394	    }
2395	  else
2396	    if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2397		&& !predicted_by_p (e->src, PRED_CONST_RETURN)
2398		&& !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2399	      predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2400	}
2401
2402      /* Look for block we are guarding (ie we dominate it,
2403	 but it doesn't postdominate us).  */
2404      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2405	  && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2406	  && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2407	{
2408	  gimple_stmt_iterator bi;
2409
2410	  /* The call heuristic claims that a guarded function call
2411	     is improbable.  This is because such calls are often used
2412	     to signal exceptional situations such as printing error
2413	     messages.  */
2414	  for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2415	       gsi_next (&bi))
2416	    {
2417	      gimple stmt = gsi_stmt (bi);
2418	      if (is_gimple_call (stmt)
2419		  /* Constant and pure calls are hardly used to signalize
2420		     something exceptional.  */
2421		  && gimple_has_side_effects (stmt))
2422		{
2423		  predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2424		  break;
2425		}
2426	    }
2427	}
2428    }
2429  tree_predict_by_opcode (bb);
2430}
2431
2432/* Predict branch probabilities and estimate profile of the tree CFG.
2433   This function can be called from the loop optimizers to recompute
2434   the profile information.  */
2435
2436void
2437tree_estimate_probability (void)
2438{
2439  basic_block bb;
2440
2441  add_noreturn_fake_exit_edges ();
2442  connect_infinite_loops_to_exit ();
2443  /* We use loop_niter_by_eval, which requires that the loops have
2444     preheaders.  */
2445  create_preheaders (CP_SIMPLE_PREHEADERS);
2446  calculate_dominance_info (CDI_POST_DOMINATORS);
2447
2448  bb_predictions = pointer_map_create ();
2449  tree_bb_level_predictions ();
2450  record_loop_exits ();
2451
2452  if (number_of_loops (cfun) > 1)
2453    predict_loops ();
2454
2455  FOR_EACH_BB_FN (bb, cfun)
2456    tree_estimate_probability_bb (bb);
2457
2458  FOR_EACH_BB_FN (bb, cfun)
2459    combine_predictions_for_bb (bb);
2460
2461#ifdef ENABLE_CHECKING
2462  pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2463#endif
2464  pointer_map_destroy (bb_predictions);
2465  bb_predictions = NULL;
2466
2467  estimate_bb_frequencies (false);
2468  free_dominance_info (CDI_POST_DOMINATORS);
2469  remove_fake_exit_edges ();
2470}
2471
2472/* Predict branch probabilities and estimate profile of the tree CFG.
2473   This is the driver function for PASS_PROFILE.  */
2474
2475static unsigned int
2476tree_estimate_probability_driver (void)
2477{
2478  unsigned nb_loops;
2479
2480  loop_optimizer_init (LOOPS_NORMAL);
2481  if (dump_file && (dump_flags & TDF_DETAILS))
2482    flow_loops_dump (dump_file, NULL, 0);
2483
2484  mark_irreducible_loops ();
2485
2486  nb_loops = number_of_loops (cfun);
2487  if (nb_loops > 1)
2488    scev_initialize ();
2489
2490  tree_estimate_probability ();
2491
2492  if (nb_loops > 1)
2493    scev_finalize ();
2494
2495  loop_optimizer_finalize ();
2496  if (dump_file && (dump_flags & TDF_DETAILS))
2497    gimple_dump_cfg (dump_file, dump_flags);
2498  if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
2499    profile_status_for_fn (cfun) = PROFILE_GUESSED;
2500  return 0;
2501}
2502
2503/* Predict edges to successors of CUR whose sources are not postdominated by
2504   BB by PRED and recurse to all postdominators.  */
2505
2506static void
2507predict_paths_for_bb (basic_block cur, basic_block bb,
2508		      enum br_predictor pred,
2509		      enum prediction taken,
2510		      bitmap visited)
2511{
2512  edge e;
2513  edge_iterator ei;
2514  basic_block son;
2515
2516  /* We are looking for all edges forming edge cut induced by
2517     set of all blocks postdominated by BB.  */
2518  FOR_EACH_EDGE (e, ei, cur->preds)
2519    if (e->src->index >= NUM_FIXED_BLOCKS
2520	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2521    {
2522      edge e2;
2523      edge_iterator ei2;
2524      bool found = false;
2525
2526      /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2527      if (e->flags & (EDGE_EH | EDGE_FAKE))
2528	continue;
2529      gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2530
2531      /* See if there is an edge from e->src that is not abnormal
2532	 and does not lead to BB.  */
2533      FOR_EACH_EDGE (e2, ei2, e->src->succs)
2534	if (e2 != e
2535	    && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2536	    && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2537	  {
2538	    found = true;
2539	    break;
2540	  }
2541
2542      /* If there is non-abnormal path leaving e->src, predict edge
2543	 using predictor.  Otherwise we need to look for paths
2544	 leading to e->src.
2545
2546	 The second may lead to infinite loop in the case we are predicitng
2547	 regions that are only reachable by abnormal edges.  We simply
2548	 prevent visiting given BB twice.  */
2549      if (found)
2550        predict_edge_def (e, pred, taken);
2551      else if (bitmap_set_bit (visited, e->src->index))
2552	predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2553    }
2554  for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2555       son;
2556       son = next_dom_son (CDI_POST_DOMINATORS, son))
2557    predict_paths_for_bb (son, bb, pred, taken, visited);
2558}
2559
2560/* Sets branch probabilities according to PREDiction and
2561   FLAGS.  */
2562
2563static void
2564predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2565			  enum prediction taken)
2566{
2567  bitmap visited = BITMAP_ALLOC (NULL);
2568  predict_paths_for_bb (bb, bb, pred, taken, visited);
2569  BITMAP_FREE (visited);
2570}
2571
2572/* Like predict_paths_leading_to but take edge instead of basic block.  */
2573
2574static void
2575predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2576			       enum prediction taken)
2577{
2578  bool has_nonloop_edge = false;
2579  edge_iterator ei;
2580  edge e2;
2581
2582  basic_block bb = e->src;
2583  FOR_EACH_EDGE (e2, ei, bb->succs)
2584    if (e2->dest != e->src && e2->dest != e->dest
2585	&& !(e->flags & (EDGE_EH | EDGE_FAKE))
2586	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2587      {
2588	has_nonloop_edge = true;
2589	break;
2590      }
2591  if (!has_nonloop_edge)
2592    {
2593      bitmap visited = BITMAP_ALLOC (NULL);
2594      predict_paths_for_bb (bb, bb, pred, taken, visited);
2595      BITMAP_FREE (visited);
2596    }
2597  else
2598    predict_edge_def (e, pred, taken);
2599}
2600
2601/* This is used to carry information about basic blocks.  It is
2602   attached to the AUX field of the standard CFG block.  */
2603
2604typedef struct block_info_def
2605{
2606  /* Estimated frequency of execution of basic_block.  */
2607  sreal frequency;
2608
2609  /* To keep queue of basic blocks to process.  */
2610  basic_block next;
2611
2612  /* Number of predecessors we need to visit first.  */
2613  int npredecessors;
2614} *block_info;
2615
2616/* Similar information for edges.  */
2617typedef struct edge_info_def
2618{
2619  /* In case edge is a loopback edge, the probability edge will be reached
2620     in case header is.  Estimated number of iterations of the loop can be
2621     then computed as 1 / (1 - back_edge_prob).  */
2622  sreal back_edge_prob;
2623  /* True if the edge is a loopback edge in the natural loop.  */
2624  unsigned int back_edge:1;
2625} *edge_info;
2626
2627#define BLOCK_INFO(B)	((block_info) (B)->aux)
2628#define EDGE_INFO(E)	((edge_info) (E)->aux)
2629
2630/* Helper function for estimate_bb_frequencies.
2631   Propagate the frequencies in blocks marked in
2632   TOVISIT, starting in HEAD.  */
2633
2634static void
2635propagate_freq (basic_block head, bitmap tovisit)
2636{
2637  basic_block bb;
2638  basic_block last;
2639  unsigned i;
2640  edge e;
2641  basic_block nextbb;
2642  bitmap_iterator bi;
2643
2644  /* For each basic block we need to visit count number of his predecessors
2645     we need to visit first.  */
2646  EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2647    {
2648      edge_iterator ei;
2649      int count = 0;
2650
2651      bb = BASIC_BLOCK_FOR_FN (cfun, i);
2652
2653      FOR_EACH_EDGE (e, ei, bb->preds)
2654	{
2655	  bool visit = bitmap_bit_p (tovisit, e->src->index);
2656
2657	  if (visit && !(e->flags & EDGE_DFS_BACK))
2658	    count++;
2659	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2660	    fprintf (dump_file,
2661		     "Irreducible region hit, ignoring edge to %i->%i\n",
2662		     e->src->index, bb->index);
2663	}
2664      BLOCK_INFO (bb)->npredecessors = count;
2665      /* When function never returns, we will never process exit block.  */
2666      if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2667	bb->count = bb->frequency = 0;
2668    }
2669
2670  memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2671  last = head;
2672  for (bb = head; bb; bb = nextbb)
2673    {
2674      edge_iterator ei;
2675      sreal cyclic_probability, frequency;
2676
2677      memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2678      memcpy (&frequency, &real_zero, sizeof (real_zero));
2679
2680      nextbb = BLOCK_INFO (bb)->next;
2681      BLOCK_INFO (bb)->next = NULL;
2682
2683      /* Compute frequency of basic block.  */
2684      if (bb != head)
2685	{
2686#ifdef ENABLE_CHECKING
2687	  FOR_EACH_EDGE (e, ei, bb->preds)
2688	    gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2689			|| (e->flags & EDGE_DFS_BACK));
2690#endif
2691
2692	  FOR_EACH_EDGE (e, ei, bb->preds)
2693	    if (EDGE_INFO (e)->back_edge)
2694	      {
2695		sreal_add (&cyclic_probability, &cyclic_probability,
2696			   &EDGE_INFO (e)->back_edge_prob);
2697	      }
2698	    else if (!(e->flags & EDGE_DFS_BACK))
2699	      {
2700		sreal tmp;
2701
2702		/*  frequency += (e->probability
2703				  * BLOCK_INFO (e->src)->frequency /
2704				  REG_BR_PROB_BASE);  */
2705
2706		sreal_init (&tmp, e->probability, 0);
2707		sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2708		sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2709		sreal_add (&frequency, &frequency, &tmp);
2710	      }
2711
2712	  if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2713	    {
2714	      memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2715		      sizeof (frequency));
2716	    }
2717	  else
2718	    {
2719	      if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2720		{
2721		  memcpy (&cyclic_probability, &real_almost_one,
2722			  sizeof (real_almost_one));
2723		}
2724
2725	      /* BLOCK_INFO (bb)->frequency = frequency
2726					      / (1 - cyclic_probability) */
2727
2728	      sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2729	      sreal_div (&BLOCK_INFO (bb)->frequency,
2730			 &frequency, &cyclic_probability);
2731	    }
2732	}
2733
2734      bitmap_clear_bit (tovisit, bb->index);
2735
2736      e = find_edge (bb, head);
2737      if (e)
2738	{
2739	  sreal tmp;
2740
2741	  /* EDGE_INFO (e)->back_edge_prob
2742	     = ((e->probability * BLOCK_INFO (bb)->frequency)
2743	     / REG_BR_PROB_BASE); */
2744
2745	  sreal_init (&tmp, e->probability, 0);
2746	  sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2747	  sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2748		     &tmp, &real_inv_br_prob_base);
2749	}
2750
2751      /* Propagate to successor blocks.  */
2752      FOR_EACH_EDGE (e, ei, bb->succs)
2753	if (!(e->flags & EDGE_DFS_BACK)
2754	    && BLOCK_INFO (e->dest)->npredecessors)
2755	  {
2756	    BLOCK_INFO (e->dest)->npredecessors--;
2757	    if (!BLOCK_INFO (e->dest)->npredecessors)
2758	      {
2759		if (!nextbb)
2760		  nextbb = e->dest;
2761		else
2762		  BLOCK_INFO (last)->next = e->dest;
2763
2764		last = e->dest;
2765	      }
2766	  }
2767    }
2768}
2769
2770/* Estimate frequencies in loops at same nest level.  */
2771
2772static void
2773estimate_loops_at_level (struct loop *first_loop)
2774{
2775  struct loop *loop;
2776
2777  for (loop = first_loop; loop; loop = loop->next)
2778    {
2779      edge e;
2780      basic_block *bbs;
2781      unsigned i;
2782      bitmap tovisit = BITMAP_ALLOC (NULL);
2783
2784      estimate_loops_at_level (loop->inner);
2785
2786      /* Find current loop back edge and mark it.  */
2787      e = loop_latch_edge (loop);
2788      EDGE_INFO (e)->back_edge = 1;
2789
2790      bbs = get_loop_body (loop);
2791      for (i = 0; i < loop->num_nodes; i++)
2792	bitmap_set_bit (tovisit, bbs[i]->index);
2793      free (bbs);
2794      propagate_freq (loop->header, tovisit);
2795      BITMAP_FREE (tovisit);
2796    }
2797}
2798
2799/* Propagates frequencies through structure of loops.  */
2800
2801static void
2802estimate_loops (void)
2803{
2804  bitmap tovisit = BITMAP_ALLOC (NULL);
2805  basic_block bb;
2806
2807  /* Start by estimating the frequencies in the loops.  */
2808  if (number_of_loops (cfun) > 1)
2809    estimate_loops_at_level (current_loops->tree_root->inner);
2810
2811  /* Now propagate the frequencies through all the blocks.  */
2812  FOR_ALL_BB_FN (bb, cfun)
2813    {
2814      bitmap_set_bit (tovisit, bb->index);
2815    }
2816  propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2817  BITMAP_FREE (tovisit);
2818}
2819
2820/* Drop the profile for NODE to guessed, and update its frequency based on
2821   whether it is expected to be hot given the CALL_COUNT.  */
2822
2823static void
2824drop_profile (struct cgraph_node *node, gcov_type call_count)
2825{
2826  struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2827  /* In the case where this was called by another function with a
2828     dropped profile, call_count will be 0. Since there are no
2829     non-zero call counts to this function, we don't know for sure
2830     whether it is hot, and therefore it will be marked normal below.  */
2831  bool hot = maybe_hot_count_p (NULL, call_count);
2832
2833  if (dump_file)
2834    fprintf (dump_file,
2835             "Dropping 0 profile for %s/%i. %s based on calls.\n",
2836             node->name (), node->order,
2837             hot ? "Function is hot" : "Function is normal");
2838  /* We only expect to miss profiles for functions that are reached
2839     via non-zero call edges in cases where the function may have
2840     been linked from another module or library (COMDATs and extern
2841     templates). See the comments below for handle_missing_profiles.
2842     Also, only warn in cases where the missing counts exceed the
2843     number of training runs. In certain cases with an execv followed
2844     by a no-return call the profile for the no-return call is not
2845     dumped and there can be a mismatch.  */
2846  if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2847      && call_count > profile_info->runs)
2848    {
2849      if (flag_profile_correction)
2850        {
2851          if (dump_file)
2852            fprintf (dump_file,
2853                     "Missing counts for called function %s/%i\n",
2854                     node->name (), node->order);
2855        }
2856      else
2857        warning (0, "Missing counts for called function %s/%i",
2858                 node->name (), node->order);
2859    }
2860
2861  profile_status_for_fn (fn)
2862      = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2863  node->frequency
2864      = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2865}
2866
2867/* In the case of COMDAT routines, multiple object files will contain the same
2868   function and the linker will select one for the binary. In that case
2869   all the other copies from the profile instrument binary will be missing
2870   profile counts. Look for cases where this happened, due to non-zero
2871   call counts going to 0-count functions, and drop the profile to guessed
2872   so that we can use the estimated probabilities and avoid optimizing only
2873   for size.
2874
2875   The other case where the profile may be missing is when the routine
2876   is not going to be emitted to the object file, e.g. for "extern template"
2877   class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2878   all other cases of non-zero calls to 0-count functions.  */
2879
2880void
2881handle_missing_profiles (void)
2882{
2883  struct cgraph_node *node;
2884  int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2885  vec<struct cgraph_node *> worklist;
2886  worklist.create (64);
2887
2888  /* See if 0 count function has non-0 count callers.  In this case we
2889     lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2890  FOR_EACH_DEFINED_FUNCTION (node)
2891    {
2892      struct cgraph_edge *e;
2893      gcov_type call_count = 0;
2894      gcov_type max_tp_first_run = 0;
2895      struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2896
2897      if (node->count)
2898        continue;
2899      for (e = node->callers; e; e = e->next_caller)
2900      {
2901        call_count += e->count;
2902
2903	if (e->caller->tp_first_run > max_tp_first_run)
2904	  max_tp_first_run = e->caller->tp_first_run;
2905      }
2906
2907      /* If time profile is missing, let assign the maximum that comes from
2908	 caller functions.  */
2909      if (!node->tp_first_run && max_tp_first_run)
2910	node->tp_first_run = max_tp_first_run + 1;
2911
2912      if (call_count
2913          && fn && fn->cfg
2914          && (call_count * unlikely_count_fraction >= profile_info->runs))
2915        {
2916          drop_profile (node, call_count);
2917          worklist.safe_push (node);
2918        }
2919    }
2920
2921  /* Propagate the profile dropping to other 0-count COMDATs that are
2922     potentially called by COMDATs we already dropped the profile on.  */
2923  while (worklist.length () > 0)
2924    {
2925      struct cgraph_edge *e;
2926
2927      node = worklist.pop ();
2928      for (e = node->callees; e; e = e->next_caller)
2929        {
2930          struct cgraph_node *callee = e->callee;
2931          struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2932
2933          if (callee->count > 0)
2934            continue;
2935          if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2936              && profile_status_for_fn (fn) == PROFILE_READ)
2937            {
2938              drop_profile (node, 0);
2939              worklist.safe_push (callee);
2940            }
2941        }
2942    }
2943  worklist.release ();
2944}
2945
2946/* Convert counts measured by profile driven feedback to frequencies.
2947   Return nonzero iff there was any nonzero execution count.  */
2948
2949int
2950counts_to_freqs (void)
2951{
2952  gcov_type count_max, true_count_max = 0;
2953  basic_block bb;
2954
2955  /* Don't overwrite the estimated frequencies when the profile for
2956     the function is missing.  We may drop this function PROFILE_GUESSED
2957     later in drop_profile ().  */
2958  if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2959    return 0;
2960
2961  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2962    true_count_max = MAX (bb->count, true_count_max);
2963
2964  count_max = MAX (true_count_max, 1);
2965  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2966    bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2967
2968  return true_count_max;
2969}
2970
2971/* Return true if function is likely to be expensive, so there is no point to
2972   optimize performance of prologue, epilogue or do inlining at the expense
2973   of code size growth.  THRESHOLD is the limit of number of instructions
2974   function can execute at average to be still considered not expensive.  */
2975
2976bool
2977expensive_function_p (int threshold)
2978{
2979  unsigned int sum = 0;
2980  basic_block bb;
2981  unsigned int limit;
2982
2983  /* We can not compute accurately for large thresholds due to scaled
2984     frequencies.  */
2985  gcc_assert (threshold <= BB_FREQ_MAX);
2986
2987  /* Frequencies are out of range.  This either means that function contains
2988     internal loop executing more than BB_FREQ_MAX times or profile feedback
2989     is available and function has not been executed at all.  */
2990  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2991    return true;
2992
2993  /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2994  limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2995  FOR_EACH_BB_FN (bb, cfun)
2996    {
2997      rtx insn;
2998
2999      FOR_BB_INSNS (bb, insn)
3000	if (active_insn_p (insn))
3001	  {
3002	    sum += bb->frequency;
3003	    if (sum > limit)
3004	      return true;
3005	}
3006    }
3007
3008  return false;
3009}
3010
3011/* Estimate and propagate basic block frequencies using the given branch
3012   probabilities.  If FORCE is true, the frequencies are used to estimate
3013   the counts even when there are already non-zero profile counts.  */
3014
3015void
3016estimate_bb_frequencies (bool force)
3017{
3018  basic_block bb;
3019  sreal freq_max;
3020
3021  if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
3022    {
3023      static int real_values_initialized = 0;
3024
3025      if (!real_values_initialized)
3026        {
3027	  real_values_initialized = 1;
3028	  sreal_init (&real_zero, 0, 0);
3029	  sreal_init (&real_one, 1, 0);
3030	  sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
3031	  sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
3032	  sreal_init (&real_one_half, 1, -1);
3033	  sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
3034	  sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
3035	}
3036
3037      mark_dfs_back_edges ();
3038
3039      single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
3040	 REG_BR_PROB_BASE;
3041
3042      /* Set up block info for each basic block.  */
3043      alloc_aux_for_blocks (sizeof (struct block_info_def));
3044      alloc_aux_for_edges (sizeof (struct edge_info_def));
3045      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3046	{
3047	  edge e;
3048	  edge_iterator ei;
3049
3050	  FOR_EACH_EDGE (e, ei, bb->succs)
3051	    {
3052	      sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
3053	      sreal_mul (&EDGE_INFO (e)->back_edge_prob,
3054			 &EDGE_INFO (e)->back_edge_prob,
3055			 &real_inv_br_prob_base);
3056	    }
3057	}
3058
3059      /* First compute frequencies locally for each loop from innermost
3060         to outermost to examine frequencies for back edges.  */
3061      estimate_loops ();
3062
3063      memcpy (&freq_max, &real_zero, sizeof (real_zero));
3064      FOR_EACH_BB_FN (bb, cfun)
3065	if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
3066	  memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
3067
3068      sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
3069      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3070	{
3071	  sreal tmp;
3072
3073	  sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
3074	  sreal_add (&tmp, &tmp, &real_one_half);
3075	  bb->frequency = sreal_to_int (&tmp);
3076	}
3077
3078      free_aux_for_blocks ();
3079      free_aux_for_edges ();
3080    }
3081  compute_function_frequency ();
3082}
3083
3084/* Decide whether function is hot, cold or unlikely executed.  */
3085void
3086compute_function_frequency (void)
3087{
3088  basic_block bb;
3089  struct cgraph_node *node = cgraph_get_node (current_function_decl);
3090
3091  if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3092      || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3093    node->only_called_at_startup = true;
3094  if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3095    node->only_called_at_exit = true;
3096
3097  if (profile_status_for_fn (cfun) != PROFILE_READ)
3098    {
3099      int flags = flags_from_decl_or_type (current_function_decl);
3100      if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3101	  != NULL)
3102        node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3103      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3104	       != NULL)
3105        node->frequency = NODE_FREQUENCY_HOT;
3106      else if (flags & ECF_NORETURN)
3107        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3108      else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3109        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3110      else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3111	       || DECL_STATIC_DESTRUCTOR (current_function_decl))
3112        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3113      return;
3114    }
3115
3116  /* Only first time try to drop function into unlikely executed.
3117     After inlining the roundoff errors may confuse us.
3118     Ipa-profile pass will drop functions only called from unlikely
3119     functions to unlikely and that is most of what we care about.  */
3120  if (!cfun->after_inlining)
3121    node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3122  FOR_EACH_BB_FN (bb, cfun)
3123    {
3124      if (maybe_hot_bb_p (cfun, bb))
3125	{
3126	  node->frequency = NODE_FREQUENCY_HOT;
3127	  return;
3128	}
3129      if (!probably_never_executed_bb_p (cfun, bb))
3130	node->frequency = NODE_FREQUENCY_NORMAL;
3131    }
3132}
3133
3134static bool
3135gate_estimate_probability (void)
3136{
3137  return flag_guess_branch_prob;
3138}
3139
3140/* Build PREDICT_EXPR.  */
3141tree
3142build_predict_expr (enum br_predictor predictor, enum prediction taken)
3143{
3144  tree t = build1 (PREDICT_EXPR, void_type_node,
3145		   build_int_cst (integer_type_node, predictor));
3146  SET_PREDICT_EXPR_OUTCOME (t, taken);
3147  return t;
3148}
3149
3150const char *
3151predictor_name (enum br_predictor predictor)
3152{
3153  return predictor_info[predictor].name;
3154}
3155
3156namespace {
3157
3158const pass_data pass_data_profile =
3159{
3160  GIMPLE_PASS, /* type */
3161  "profile_estimate", /* name */
3162  OPTGROUP_NONE, /* optinfo_flags */
3163  true, /* has_gate */
3164  true, /* has_execute */
3165  TV_BRANCH_PROB, /* tv_id */
3166  PROP_cfg, /* properties_required */
3167  0, /* properties_provided */
3168  0, /* properties_destroyed */
3169  0, /* todo_flags_start */
3170  TODO_verify_ssa, /* todo_flags_finish */
3171};
3172
3173class pass_profile : public gimple_opt_pass
3174{
3175public:
3176  pass_profile (gcc::context *ctxt)
3177    : gimple_opt_pass (pass_data_profile, ctxt)
3178  {}
3179
3180  /* opt_pass methods: */
3181  bool gate () { return gate_estimate_probability (); }
3182  unsigned int execute () { return tree_estimate_probability_driver (); }
3183
3184}; // class pass_profile
3185
3186} // anon namespace
3187
3188gimple_opt_pass *
3189make_pass_profile (gcc::context *ctxt)
3190{
3191  return new pass_profile (ctxt);
3192}
3193
3194namespace {
3195
3196const pass_data pass_data_strip_predict_hints =
3197{
3198  GIMPLE_PASS, /* type */
3199  "*strip_predict_hints", /* name */
3200  OPTGROUP_NONE, /* optinfo_flags */
3201  false, /* has_gate */
3202  true, /* has_execute */
3203  TV_BRANCH_PROB, /* tv_id */
3204  PROP_cfg, /* properties_required */
3205  0, /* properties_provided */
3206  0, /* properties_destroyed */
3207  0, /* todo_flags_start */
3208  TODO_verify_ssa, /* todo_flags_finish */
3209};
3210
3211class pass_strip_predict_hints : public gimple_opt_pass
3212{
3213public:
3214  pass_strip_predict_hints (gcc::context *ctxt)
3215    : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3216  {}
3217
3218  /* opt_pass methods: */
3219  opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3220  unsigned int execute () { return strip_predict_hints (); }
3221
3222}; // class pass_strip_predict_hints
3223
3224} // anon namespace
3225
3226gimple_opt_pass *
3227make_pass_strip_predict_hints (gcc::context *ctxt)
3228{
3229  return new pass_strip_predict_hints (ctxt);
3230}
3231
3232/* Rebuild function frequencies.  Passes are in general expected to
3233   maintain profile by hand, however in some cases this is not possible:
3234   for example when inlining several functions with loops freuqencies might run
3235   out of scale and thus needs to be recomputed.  */
3236
3237void
3238rebuild_frequencies (void)
3239{
3240  timevar_push (TV_REBUILD_FREQUENCIES);
3241
3242  /* When the max bb count in the function is small, there is a higher
3243     chance that there were truncation errors in the integer scaling
3244     of counts by inlining and other optimizations. This could lead
3245     to incorrect classification of code as being cold when it isn't.
3246     In that case, force the estimation of bb counts/frequencies from the
3247     branch probabilities, rather than computing frequencies from counts,
3248     which may also lead to frequencies incorrectly reduced to 0. There
3249     is less precision in the probabilities, so we only do this for small
3250     max counts.  */
3251  gcov_type count_max = 0;
3252  basic_block bb;
3253  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3254    count_max = MAX (bb->count, count_max);
3255
3256  if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3257      || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3258    {
3259      loop_optimizer_init (0);
3260      add_noreturn_fake_exit_edges ();
3261      mark_irreducible_loops ();
3262      connect_infinite_loops_to_exit ();
3263      estimate_bb_frequencies (true);
3264      remove_fake_exit_edges ();
3265      loop_optimizer_finalize ();
3266    }
3267  else if (profile_status_for_fn (cfun) == PROFILE_READ)
3268    counts_to_freqs ();
3269  else
3270    gcc_unreachable ();
3271  timevar_pop (TV_REBUILD_FREQUENCIES);
3272}
3273