1/* Branch prediction routines for the GNU compiler.
2   Copyright (C) 2000-2014 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* References:
21
22   [1] "Branch Prediction for Free"
23       Ball and Larus; PLDI '93.
24   [2] "Static Branch Frequency and Program Profile Analysis"
25       Wu and Larus; MICRO-27.
26   [3] "Corpus-based Static Branch Prediction"
27       Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
28
29
30#include "config.h"
31#include "system.h"
32#include "coretypes.h"
33#include "tm.h"
34#include "tree.h"
35#include "calls.h"
36#include "rtl.h"
37#include "tm_p.h"
38#include "hard-reg-set.h"
39#include "basic-block.h"
40#include "insn-config.h"
41#include "regs.h"
42#include "flags.h"
43#include "function.h"
44#include "except.h"
45#include "diagnostic-core.h"
46#include "recog.h"
47#include "expr.h"
48#include "predict.h"
49#include "coverage.h"
50#include "sreal.h"
51#include "params.h"
52#include "target.h"
53#include "cfgloop.h"
54#include "hash-map.h"
55#include "tree-ssa-alias.h"
56#include "internal-fn.h"
57#include "gimple-expr.h"
58#include "is-a.h"
59#include "gimple.h"
60#include "gimple-iterator.h"
61#include "gimple-ssa.h"
62#include "cgraph.h"
63#include "tree-cfg.h"
64#include "tree-phinodes.h"
65#include "ssa-iterators.h"
66#include "tree-ssa-loop-niter.h"
67#include "tree-ssa-loop.h"
68#include "tree-pass.h"
69#include "tree-scalar-evolution.h"
70#include "cfgloop.h"
71
72/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
73		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
74static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
75	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
76
77static void combine_predictions_for_insn (rtx_insn *, basic_block);
78static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
79static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
80static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
81static bool can_predict_insn_p (const rtx_insn *);
82
83/* Information we hold about each branch predictor.
84   Filled using information from predict.def.  */
85
86struct predictor_info
87{
88  const char *const name;	/* Name used in the debugging dumps.  */
89  const int hitrate;		/* Expected hitrate used by
90				   predict_insn_def call.  */
91  const int flags;
92};
93
94/* Use given predictor without Dempster-Shaffer theory if it matches
95   using first_match heuristics.  */
96#define PRED_FLAG_FIRST_MATCH 1
97
98/* Recompute hitrate in percent to our representation.  */
99
100#define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101
102#define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
103static const struct predictor_info predictor_info[]= {
104#include "predict.def"
105
106  /* Upper bound on predictors.  */
107  {NULL, 0, 0}
108};
109#undef DEF_PREDICTOR
110
111/* Return TRUE if frequency FREQ is considered to be hot.  */
112
113static inline bool
114maybe_hot_frequency_p (struct function *fun, int freq)
115{
116  struct cgraph_node *node = cgraph_node::get (fun->decl);
117  if (!profile_info || !flag_branch_probabilities)
118    {
119      if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
120        return false;
121      if (node->frequency == NODE_FREQUENCY_HOT)
122        return true;
123    }
124  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
125    return true;
126  if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
127      && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
128    return false;
129  if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
130    return false;
131  if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
132	      / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
133    return false;
134  return true;
135}
136
137static gcov_type min_count = -1;
138
139/* Determine the threshold for hot BB counts.  */
140
141gcov_type
142get_hot_bb_threshold ()
143{
144  gcov_working_set_t *ws;
145  if (min_count == -1)
146    {
147      ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
148      gcc_assert (ws);
149      min_count = ws->min_counter;
150    }
151  return min_count;
152}
153
154/* Set the threshold for hot BB counts.  */
155
156void
157set_hot_bb_threshold (gcov_type min)
158{
159  min_count = min;
160}
161
162/* Return TRUE if frequency FREQ is considered to be hot.  */
163
164static inline bool
165maybe_hot_count_p (struct function *fun, gcov_type count)
166{
167  if (fun && profile_status_for_fn (fun) != PROFILE_READ)
168    return true;
169  /* Code executed at most once is not hot.  */
170  if (profile_info->runs >= count)
171    return false;
172  return (count >= get_hot_bb_threshold ());
173}
174
175/* Return true in case BB can be CPU intensive and should be optimized
176   for maximal performance.  */
177
178bool
179maybe_hot_bb_p (struct function *fun, const_basic_block bb)
180{
181  gcc_checking_assert (fun);
182  if (profile_status_for_fn (fun) == PROFILE_READ)
183    return maybe_hot_count_p (fun, bb->count);
184  return maybe_hot_frequency_p (fun, bb->frequency);
185}
186
187/* Return true if the call can be hot.  */
188
189bool
190cgraph_edge::maybe_hot_p (void)
191{
192  if (profile_info && flag_branch_probabilities
193      && !maybe_hot_count_p (NULL, count))
194    return false;
195  if (caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
196      || (callee
197	  && callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
198    return false;
199  if (caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
200      && (callee
201	  && callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
202    return false;
203  if (optimize_size)
204    return false;
205  if (caller->frequency == NODE_FREQUENCY_HOT)
206    return true;
207  if (caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
208      && frequency < CGRAPH_FREQ_BASE * 3 / 2)
209    return false;
210  if (flag_guess_branch_prob)
211    {
212      if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
213	  || frequency <= (CGRAPH_FREQ_BASE
214				 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
215        return false;
216    }
217  return true;
218}
219
220/* Return true in case BB can be CPU intensive and should be optimized
221   for maximal performance.  */
222
223bool
224maybe_hot_edge_p (edge e)
225{
226  if (profile_status_for_fn (cfun) == PROFILE_READ)
227    return maybe_hot_count_p (cfun, e->count);
228  return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
229}
230
231
232
233/* Return true if profile COUNT and FREQUENCY, or function FUN static
234   node frequency reflects never being executed.  */
235
236static bool
237probably_never_executed (struct function *fun,
238                         gcov_type count, int frequency)
239{
240  gcc_checking_assert (fun);
241  if (profile_status_for_fn (cfun) == PROFILE_READ)
242    {
243      int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
244      if (count * unlikely_count_fraction >= profile_info->runs)
245	return false;
246      if (!frequency)
247	return true;
248      if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
249	return false;
250      if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
251	{
252          gcov_type computed_count;
253          /* Check for possibility of overflow, in which case entry bb count
254             is large enough to do the division first without losing much
255             precision.  */
256	  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
257	      REG_BR_PROB_BASE)
258            {
259              gcov_type scaled_count
260		  = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
261	     unlikely_count_fraction;
262	      computed_count = RDIV (scaled_count,
263				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
264            }
265          else
266            {
267	      computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
268				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
269              computed_count *= frequency * unlikely_count_fraction;
270            }
271          if (computed_count >= profile_info->runs)
272            return false;
273	}
274      return true;
275    }
276  if ((!profile_info || !flag_branch_probabilities)
277      && (cgraph_node::get (fun->decl)->frequency
278	  == NODE_FREQUENCY_UNLIKELY_EXECUTED))
279    return true;
280  return false;
281}
282
283
284/* Return true in case BB is probably never executed.  */
285
286bool
287probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
288{
289  return probably_never_executed (fun, bb->count, bb->frequency);
290}
291
292
293/* Return true in case edge E is probably never executed.  */
294
295bool
296probably_never_executed_edge_p (struct function *fun, edge e)
297{
298  return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
299}
300
301/* Return true if function should be optimized for size.  */
302
303bool
304cgraph_node::optimize_for_size_p (void)
305{
306  if (optimize_size)
307    return true;
308  if (frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
309    return true;
310  else
311    return false;
312}
313
314/* Return true when current function should always be optimized for size.  */
315
316bool
317optimize_function_for_size_p (struct function *fun)
318{
319  if (optimize_size)
320    return true;
321  if (!fun || !fun->decl)
322    return false;
323
324  cgraph_node *n = cgraph_node::get (fun->decl);
325  return n && n->optimize_for_size_p ();
326}
327
328/* Return true when current function should always be optimized for speed.  */
329
330bool
331optimize_function_for_speed_p (struct function *fun)
332{
333  return !optimize_function_for_size_p (fun);
334}
335
336/* Return TRUE when BB should be optimized for size.  */
337
338bool
339optimize_bb_for_size_p (const_basic_block bb)
340{
341  return (optimize_function_for_size_p (cfun)
342	  || (bb && !maybe_hot_bb_p (cfun, bb)));
343}
344
345/* Return TRUE when BB should be optimized for speed.  */
346
347bool
348optimize_bb_for_speed_p (const_basic_block bb)
349{
350  return !optimize_bb_for_size_p (bb);
351}
352
353/* Return TRUE when BB should be optimized for size.  */
354
355bool
356optimize_edge_for_size_p (edge e)
357{
358  return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
359}
360
361/* Return TRUE when BB should be optimized for speed.  */
362
363bool
364optimize_edge_for_speed_p (edge e)
365{
366  return !optimize_edge_for_size_p (e);
367}
368
369/* Return TRUE when BB should be optimized for size.  */
370
371bool
372optimize_insn_for_size_p (void)
373{
374  return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
375}
376
377/* Return TRUE when BB should be optimized for speed.  */
378
379bool
380optimize_insn_for_speed_p (void)
381{
382  return !optimize_insn_for_size_p ();
383}
384
385/* Return TRUE when LOOP should be optimized for size.  */
386
387bool
388optimize_loop_for_size_p (struct loop *loop)
389{
390  return optimize_bb_for_size_p (loop->header);
391}
392
393/* Return TRUE when LOOP should be optimized for speed.  */
394
395bool
396optimize_loop_for_speed_p (struct loop *loop)
397{
398  return optimize_bb_for_speed_p (loop->header);
399}
400
401/* Return TRUE when LOOP nest should be optimized for speed.  */
402
403bool
404optimize_loop_nest_for_speed_p (struct loop *loop)
405{
406  struct loop *l = loop;
407  if (optimize_loop_for_speed_p (loop))
408    return true;
409  l = loop->inner;
410  while (l && l != loop)
411    {
412      if (optimize_loop_for_speed_p (l))
413        return true;
414      if (l->inner)
415        l = l->inner;
416      else if (l->next)
417        l = l->next;
418      else
419        {
420	  while (l != loop && !l->next)
421	    l = loop_outer (l);
422	  if (l != loop)
423	    l = l->next;
424	}
425    }
426  return false;
427}
428
429/* Return TRUE when LOOP nest should be optimized for size.  */
430
431bool
432optimize_loop_nest_for_size_p (struct loop *loop)
433{
434  return !optimize_loop_nest_for_speed_p (loop);
435}
436
437/* Return true when edge E is likely to be well predictable by branch
438   predictor.  */
439
440bool
441predictable_edge_p (edge e)
442{
443  if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
444    return false;
445  if ((e->probability
446       <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
447      || (REG_BR_PROB_BASE - e->probability
448          <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
449    return true;
450  return false;
451}
452
453
454/* Set RTL expansion for BB profile.  */
455
456void
457rtl_profile_for_bb (basic_block bb)
458{
459  crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
460}
461
462/* Set RTL expansion for edge profile.  */
463
464void
465rtl_profile_for_edge (edge e)
466{
467  crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
468}
469
470/* Set RTL expansion to default mode (i.e. when profile info is not known).  */
471void
472default_rtl_profile (void)
473{
474  crtl->maybe_hot_insn_p = true;
475}
476
477/* Return true if the one of outgoing edges is already predicted by
478   PREDICTOR.  */
479
480bool
481rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
482{
483  rtx note;
484  if (!INSN_P (BB_END (bb)))
485    return false;
486  for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
487    if (REG_NOTE_KIND (note) == REG_BR_PRED
488	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
489      return true;
490  return false;
491}
492
493/*  Structure representing predictions in tree level. */
494
495struct edge_prediction {
496    struct edge_prediction *ep_next;
497    edge ep_edge;
498    enum br_predictor ep_predictor;
499    int ep_probability;
500};
501
502/* This map contains for a basic block the list of predictions for the
503   outgoing edges.  */
504
505static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
506
507/* Return true if the one of outgoing edges is already predicted by
508   PREDICTOR.  */
509
510bool
511gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
512{
513  struct edge_prediction *i;
514  edge_prediction **preds = bb_predictions->get (bb);
515
516  if (!preds)
517    return false;
518
519  for (i = *preds; i; i = i->ep_next)
520    if (i->ep_predictor == predictor)
521      return true;
522  return false;
523}
524
525/* Return true when the probability of edge is reliable.
526
527   The profile guessing code is good at predicting branch outcome (ie.
528   taken/not taken), that is predicted right slightly over 75% of time.
529   It is however notoriously poor on predicting the probability itself.
530   In general the profile appear a lot flatter (with probabilities closer
531   to 50%) than the reality so it is bad idea to use it to drive optimization
532   such as those disabling dynamic branch prediction for well predictable
533   branches.
534
535   There are two exceptions - edges leading to noreturn edges and edges
536   predicted by number of iterations heuristics are predicted well.  This macro
537   should be able to distinguish those, but at the moment it simply check for
538   noreturn heuristic that is only one giving probability over 99% or bellow
539   1%.  In future we might want to propagate reliability information across the
540   CFG if we find this information useful on multiple places.   */
541static bool
542probability_reliable_p (int prob)
543{
544  return (profile_status_for_fn (cfun) == PROFILE_READ
545	  || (profile_status_for_fn (cfun) == PROFILE_GUESSED
546	      && (prob <= HITRATE (1) || prob >= HITRATE (99))));
547}
548
549/* Same predicate as above, working on edges.  */
550bool
551edge_probability_reliable_p (const_edge e)
552{
553  return probability_reliable_p (e->probability);
554}
555
556/* Same predicate as edge_probability_reliable_p, working on notes.  */
557bool
558br_prob_note_reliable_p (const_rtx note)
559{
560  gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
561  return probability_reliable_p (XINT (note, 0));
562}
563
564static void
565predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
566{
567  gcc_assert (any_condjump_p (insn));
568  if (!flag_guess_branch_prob)
569    return;
570
571  add_reg_note (insn, REG_BR_PRED,
572		gen_rtx_CONCAT (VOIDmode,
573				GEN_INT ((int) predictor),
574				GEN_INT ((int) probability)));
575}
576
577/* Predict insn by given predictor.  */
578
579void
580predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
581		  enum prediction taken)
582{
583   int probability = predictor_info[(int) predictor].hitrate;
584
585   if (taken != TAKEN)
586     probability = REG_BR_PROB_BASE - probability;
587
588   predict_insn (insn, predictor, probability);
589}
590
591/* Predict edge E with given probability if possible.  */
592
593void
594rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
595{
596  rtx_insn *last_insn;
597  last_insn = BB_END (e->src);
598
599  /* We can store the branch prediction information only about
600     conditional jumps.  */
601  if (!any_condjump_p (last_insn))
602    return;
603
604  /* We always store probability of branching.  */
605  if (e->flags & EDGE_FALLTHRU)
606    probability = REG_BR_PROB_BASE - probability;
607
608  predict_insn (last_insn, predictor, probability);
609}
610
611/* Predict edge E with the given PROBABILITY.  */
612void
613gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
614{
615  gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
616  if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
617       1)
618      && flag_guess_branch_prob && optimize)
619    {
620      struct edge_prediction *i = XNEW (struct edge_prediction);
621      edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
622
623      i->ep_next = preds;
624      preds = i;
625      i->ep_probability = probability;
626      i->ep_predictor = predictor;
627      i->ep_edge = e;
628    }
629}
630
631/* Remove all predictions on given basic block that are attached
632   to edge E.  */
633void
634remove_predictions_associated_with_edge (edge e)
635{
636  if (!bb_predictions)
637    return;
638
639  edge_prediction **preds = bb_predictions->get (e->src);
640
641  if (preds)
642    {
643      struct edge_prediction **prediction = preds;
644      struct edge_prediction *next;
645
646      while (*prediction)
647	{
648	  if ((*prediction)->ep_edge == e)
649	    {
650	      next = (*prediction)->ep_next;
651	      free (*prediction);
652	      *prediction = next;
653	    }
654	  else
655	    prediction = &((*prediction)->ep_next);
656	}
657    }
658}
659
660/* Clears the list of predictions stored for BB.  */
661
662static void
663clear_bb_predictions (basic_block bb)
664{
665  edge_prediction **preds = bb_predictions->get (bb);
666  struct edge_prediction *pred, *next;
667
668  if (!preds)
669    return;
670
671  for (pred = *preds; pred; pred = next)
672    {
673      next = pred->ep_next;
674      free (pred);
675    }
676  *preds = NULL;
677}
678
679/* Return true when we can store prediction on insn INSN.
680   At the moment we represent predictions only on conditional
681   jumps, not at computed jump or other complicated cases.  */
682static bool
683can_predict_insn_p (const rtx_insn *insn)
684{
685  return (JUMP_P (insn)
686	  && any_condjump_p (insn)
687	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
688}
689
690/* Predict edge E by given predictor if possible.  */
691
692void
693predict_edge_def (edge e, enum br_predictor predictor,
694		  enum prediction taken)
695{
696   int probability = predictor_info[(int) predictor].hitrate;
697
698   if (taken != TAKEN)
699     probability = REG_BR_PROB_BASE - probability;
700
701   predict_edge (e, predictor, probability);
702}
703
704/* Invert all branch predictions or probability notes in the INSN.  This needs
705   to be done each time we invert the condition used by the jump.  */
706
707void
708invert_br_probabilities (rtx insn)
709{
710  rtx note;
711
712  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
713    if (REG_NOTE_KIND (note) == REG_BR_PROB)
714      XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
715    else if (REG_NOTE_KIND (note) == REG_BR_PRED)
716      XEXP (XEXP (note, 0), 1)
717	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
718}
719
720/* Dump information about the branch prediction to the output file.  */
721
722static void
723dump_prediction (FILE *file, enum br_predictor predictor, int probability,
724		 basic_block bb, int used)
725{
726  edge e;
727  edge_iterator ei;
728
729  if (!file)
730    return;
731
732  FOR_EACH_EDGE (e, ei, bb->succs)
733    if (! (e->flags & EDGE_FALLTHRU))
734      break;
735
736  fprintf (file, "  %s heuristics%s: %.1f%%",
737	   predictor_info[predictor].name,
738	   used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
739
740  if (bb->count)
741    {
742      fprintf (file, "  exec %"PRId64, bb->count);
743      if (e)
744	{
745	  fprintf (file, " hit %"PRId64, e->count);
746	  fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
747	}
748    }
749
750  fprintf (file, "\n");
751}
752
753/* We can not predict the probabilities of outgoing edges of bb.  Set them
754   evenly and hope for the best.  */
755static void
756set_even_probabilities (basic_block bb)
757{
758  int nedges = 0;
759  edge e;
760  edge_iterator ei;
761
762  FOR_EACH_EDGE (e, ei, bb->succs)
763    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
764      nedges ++;
765  FOR_EACH_EDGE (e, ei, bb->succs)
766    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
767      e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
768    else
769      e->probability = 0;
770}
771
772/* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
773   note if not already present.  Remove now useless REG_BR_PRED notes.  */
774
775static void
776combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
777{
778  rtx prob_note;
779  rtx *pnote;
780  rtx note;
781  int best_probability = PROB_EVEN;
782  enum br_predictor best_predictor = END_PREDICTORS;
783  int combined_probability = REG_BR_PROB_BASE / 2;
784  int d;
785  bool first_match = false;
786  bool found = false;
787
788  if (!can_predict_insn_p (insn))
789    {
790      set_even_probabilities (bb);
791      return;
792    }
793
794  prob_note = find_reg_note (insn, REG_BR_PROB, 0);
795  pnote = &REG_NOTES (insn);
796  if (dump_file)
797    fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
798	     bb->index);
799
800  /* We implement "first match" heuristics and use probability guessed
801     by predictor with smallest index.  */
802  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
803    if (REG_NOTE_KIND (note) == REG_BR_PRED)
804      {
805	enum br_predictor predictor = ((enum br_predictor)
806				       INTVAL (XEXP (XEXP (note, 0), 0)));
807	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
808
809	found = true;
810	if (best_predictor > predictor)
811	  best_probability = probability, best_predictor = predictor;
812
813	d = (combined_probability * probability
814	     + (REG_BR_PROB_BASE - combined_probability)
815	     * (REG_BR_PROB_BASE - probability));
816
817	/* Use FP math to avoid overflows of 32bit integers.  */
818	if (d == 0)
819	  /* If one probability is 0% and one 100%, avoid division by zero.  */
820	  combined_probability = REG_BR_PROB_BASE / 2;
821	else
822	  combined_probability = (((double) combined_probability) * probability
823				  * REG_BR_PROB_BASE / d + 0.5);
824      }
825
826  /* Decide which heuristic to use.  In case we didn't match anything,
827     use no_prediction heuristic, in case we did match, use either
828     first match or Dempster-Shaffer theory depending on the flags.  */
829
830  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
831    first_match = true;
832
833  if (!found)
834    dump_prediction (dump_file, PRED_NO_PREDICTION,
835		     combined_probability, bb, true);
836  else
837    {
838      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
839		       bb, !first_match);
840      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
841		       bb, first_match);
842    }
843
844  if (first_match)
845    combined_probability = best_probability;
846  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
847
848  while (*pnote)
849    {
850      if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
851	{
852	  enum br_predictor predictor = ((enum br_predictor)
853					 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
854	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
855
856	  dump_prediction (dump_file, predictor, probability, bb,
857			   !first_match || best_predictor == predictor);
858	  *pnote = XEXP (*pnote, 1);
859	}
860      else
861	pnote = &XEXP (*pnote, 1);
862    }
863
864  if (!prob_note)
865    {
866      add_int_reg_note (insn, REG_BR_PROB, combined_probability);
867
868      /* Save the prediction into CFG in case we are seeing non-degenerated
869	 conditional jump.  */
870      if (!single_succ_p (bb))
871	{
872	  BRANCH_EDGE (bb)->probability = combined_probability;
873	  FALLTHRU_EDGE (bb)->probability
874	    = REG_BR_PROB_BASE - combined_probability;
875	}
876    }
877  else if (!single_succ_p (bb))
878    {
879      int prob = XINT (prob_note, 0);
880
881      BRANCH_EDGE (bb)->probability = prob;
882      FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
883    }
884  else
885    single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
886}
887
888/* Combine predictions into single probability and store them into CFG.
889   Remove now useless prediction entries.  */
890
891static void
892combine_predictions_for_bb (basic_block bb)
893{
894  int best_probability = PROB_EVEN;
895  enum br_predictor best_predictor = END_PREDICTORS;
896  int combined_probability = REG_BR_PROB_BASE / 2;
897  int d;
898  bool first_match = false;
899  bool found = false;
900  struct edge_prediction *pred;
901  int nedges = 0;
902  edge e, first = NULL, second = NULL;
903  edge_iterator ei;
904
905  FOR_EACH_EDGE (e, ei, bb->succs)
906    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
907      {
908	nedges ++;
909	if (first && !second)
910	  second = e;
911	if (!first)
912	  first = e;
913      }
914
915  /* When there is no successor or only one choice, prediction is easy.
916
917     We are lazy for now and predict only basic blocks with two outgoing
918     edges.  It is possible to predict generic case too, but we have to
919     ignore first match heuristics and do more involved combining.  Implement
920     this later.  */
921  if (nedges != 2)
922    {
923      if (!bb->count)
924	set_even_probabilities (bb);
925      clear_bb_predictions (bb);
926      if (dump_file)
927	fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
928		 nedges, bb->index);
929      return;
930    }
931
932  if (dump_file)
933    fprintf (dump_file, "Predictions for bb %i\n", bb->index);
934
935  edge_prediction **preds = bb_predictions->get (bb);
936  if (preds)
937    {
938      /* We implement "first match" heuristics and use probability guessed
939	 by predictor with smallest index.  */
940      for (pred = *preds; pred; pred = pred->ep_next)
941	{
942	  enum br_predictor predictor = pred->ep_predictor;
943	  int probability = pred->ep_probability;
944
945	  if (pred->ep_edge != first)
946	    probability = REG_BR_PROB_BASE - probability;
947
948	  found = true;
949	  /* First match heuristics would be widly confused if we predicted
950	     both directions.  */
951	  if (best_predictor > predictor)
952	    {
953              struct edge_prediction *pred2;
954	      int prob = probability;
955
956	      for (pred2 = (struct edge_prediction *) *preds;
957		   pred2; pred2 = pred2->ep_next)
958	       if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
959	         {
960	           int probability2 = pred->ep_probability;
961
962		   if (pred2->ep_edge != first)
963		     probability2 = REG_BR_PROB_BASE - probability2;
964
965		   if ((probability < REG_BR_PROB_BASE / 2) !=
966		       (probability2 < REG_BR_PROB_BASE / 2))
967		     break;
968
969		   /* If the same predictor later gave better result, go for it! */
970		   if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
971		       || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
972		     prob = probability2;
973		 }
974	      if (!pred2)
975	        best_probability = prob, best_predictor = predictor;
976	    }
977
978	  d = (combined_probability * probability
979	       + (REG_BR_PROB_BASE - combined_probability)
980	       * (REG_BR_PROB_BASE - probability));
981
982	  /* Use FP math to avoid overflows of 32bit integers.  */
983	  if (d == 0)
984	    /* If one probability is 0% and one 100%, avoid division by zero.  */
985	    combined_probability = REG_BR_PROB_BASE / 2;
986	  else
987	    combined_probability = (((double) combined_probability)
988				    * probability
989		    		    * REG_BR_PROB_BASE / d + 0.5);
990	}
991    }
992
993  /* Decide which heuristic to use.  In case we didn't match anything,
994     use no_prediction heuristic, in case we did match, use either
995     first match or Dempster-Shaffer theory depending on the flags.  */
996
997  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
998    first_match = true;
999
1000  if (!found)
1001    dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1002  else
1003    {
1004      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1005		       !first_match);
1006      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1007		       first_match);
1008    }
1009
1010  if (first_match)
1011    combined_probability = best_probability;
1012  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1013
1014  if (preds)
1015    {
1016      for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1017	{
1018	  enum br_predictor predictor = pred->ep_predictor;
1019	  int probability = pred->ep_probability;
1020
1021	  if (pred->ep_edge != EDGE_SUCC (bb, 0))
1022	    probability = REG_BR_PROB_BASE - probability;
1023	  dump_prediction (dump_file, predictor, probability, bb,
1024			   !first_match || best_predictor == predictor);
1025	}
1026    }
1027  clear_bb_predictions (bb);
1028
1029  if (!bb->count)
1030    {
1031      first->probability = combined_probability;
1032      second->probability = REG_BR_PROB_BASE - combined_probability;
1033    }
1034}
1035
1036/* Check if T1 and T2 satisfy the IV_COMPARE condition.
1037   Return the SSA_NAME if the condition satisfies, NULL otherwise.
1038
1039   T1 and T2 should be one of the following cases:
1040     1. T1 is SSA_NAME, T2 is NULL
1041     2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1042     3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1043
1044static tree
1045strips_small_constant (tree t1, tree t2)
1046{
1047  tree ret = NULL;
1048  int value = 0;
1049
1050  if (!t1)
1051    return NULL;
1052  else if (TREE_CODE (t1) == SSA_NAME)
1053    ret = t1;
1054  else if (tree_fits_shwi_p (t1))
1055    value = tree_to_shwi (t1);
1056  else
1057    return NULL;
1058
1059  if (!t2)
1060    return ret;
1061  else if (tree_fits_shwi_p (t2))
1062    value = tree_to_shwi (t2);
1063  else if (TREE_CODE (t2) == SSA_NAME)
1064    {
1065      if (ret)
1066        return NULL;
1067      else
1068        ret = t2;
1069    }
1070
1071  if (value <= 4 && value >= -4)
1072    return ret;
1073  else
1074    return NULL;
1075}
1076
1077/* Return the SSA_NAME in T or T's operands.
1078   Return NULL if SSA_NAME cannot be found.  */
1079
1080static tree
1081get_base_value (tree t)
1082{
1083  if (TREE_CODE (t) == SSA_NAME)
1084    return t;
1085
1086  if (!BINARY_CLASS_P (t))
1087    return NULL;
1088
1089  switch (TREE_OPERAND_LENGTH (t))
1090    {
1091    case 1:
1092      return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1093    case 2:
1094      return strips_small_constant (TREE_OPERAND (t, 0),
1095				    TREE_OPERAND (t, 1));
1096    default:
1097      return NULL;
1098    }
1099}
1100
1101/* Check the compare STMT in LOOP. If it compares an induction
1102   variable to a loop invariant, return true, and save
1103   LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1104   Otherwise return false and set LOOP_INVAIANT to NULL.  */
1105
1106static bool
1107is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1108				     tree *loop_invariant,
1109				     enum tree_code *compare_code,
1110				     tree *loop_step,
1111				     tree *loop_iv_base)
1112{
1113  tree op0, op1, bound, base;
1114  affine_iv iv0, iv1;
1115  enum tree_code code;
1116  tree step;
1117
1118  code = gimple_cond_code (stmt);
1119  *loop_invariant = NULL;
1120
1121  switch (code)
1122    {
1123    case GT_EXPR:
1124    case GE_EXPR:
1125    case NE_EXPR:
1126    case LT_EXPR:
1127    case LE_EXPR:
1128    case EQ_EXPR:
1129      break;
1130
1131    default:
1132      return false;
1133    }
1134
1135  op0 = gimple_cond_lhs (stmt);
1136  op1 = gimple_cond_rhs (stmt);
1137
1138  if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1139       || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1140    return false;
1141  if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1142    return false;
1143  if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1144    return false;
1145  if (TREE_CODE (iv0.step) != INTEGER_CST
1146      || TREE_CODE (iv1.step) != INTEGER_CST)
1147    return false;
1148  if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1149      || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1150    return false;
1151
1152  if (integer_zerop (iv0.step))
1153    {
1154      if (code != NE_EXPR && code != EQ_EXPR)
1155	code = invert_tree_comparison (code, false);
1156      bound = iv0.base;
1157      base = iv1.base;
1158      if (tree_fits_shwi_p (iv1.step))
1159	step = iv1.step;
1160      else
1161	return false;
1162    }
1163  else
1164    {
1165      bound = iv1.base;
1166      base = iv0.base;
1167      if (tree_fits_shwi_p (iv0.step))
1168	step = iv0.step;
1169      else
1170	return false;
1171    }
1172
1173  if (TREE_CODE (bound) != INTEGER_CST)
1174    bound = get_base_value (bound);
1175  if (!bound)
1176    return false;
1177  if (TREE_CODE (base) != INTEGER_CST)
1178    base = get_base_value (base);
1179  if (!base)
1180    return false;
1181
1182  *loop_invariant = bound;
1183  *compare_code = code;
1184  *loop_step = step;
1185  *loop_iv_base = base;
1186  return true;
1187}
1188
1189/* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1190
1191static bool
1192expr_coherent_p (tree t1, tree t2)
1193{
1194  gimple stmt;
1195  tree ssa_name_1 = NULL;
1196  tree ssa_name_2 = NULL;
1197
1198  gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1199  gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1200
1201  if (t1 == t2)
1202    return true;
1203
1204  if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1205    return true;
1206  if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1207    return false;
1208
1209  /* Check to see if t1 is expressed/defined with t2.  */
1210  stmt = SSA_NAME_DEF_STMT (t1);
1211  gcc_assert (stmt != NULL);
1212  if (is_gimple_assign (stmt))
1213    {
1214      ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1215      if (ssa_name_1 && ssa_name_1 == t2)
1216	return true;
1217    }
1218
1219  /* Check to see if t2 is expressed/defined with t1.  */
1220  stmt = SSA_NAME_DEF_STMT (t2);
1221  gcc_assert (stmt != NULL);
1222  if (is_gimple_assign (stmt))
1223    {
1224      ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1225      if (ssa_name_2 && ssa_name_2 == t1)
1226	return true;
1227    }
1228
1229  /* Compare if t1 and t2's def_stmts are identical.  */
1230  if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1231    return true;
1232  else
1233    return false;
1234}
1235
1236/* Predict branch probability of BB when BB contains a branch that compares
1237   an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1238   loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1239
1240   E.g.
1241     for (int i = 0; i < bound; i++) {
1242       if (i < bound - 2)
1243	 computation_1();
1244       else
1245	 computation_2();
1246     }
1247
1248  In this loop, we will predict the branch inside the loop to be taken.  */
1249
1250static void
1251predict_iv_comparison (struct loop *loop, basic_block bb,
1252		       tree loop_bound_var,
1253		       tree loop_iv_base_var,
1254		       enum tree_code loop_bound_code,
1255		       int loop_bound_step)
1256{
1257  gimple stmt;
1258  tree compare_var, compare_base;
1259  enum tree_code compare_code;
1260  tree compare_step_var;
1261  edge then_edge;
1262  edge_iterator ei;
1263
1264  if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1265      || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1266      || predicted_by_p (bb, PRED_LOOP_EXIT))
1267    return;
1268
1269  stmt = last_stmt (bb);
1270  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1271    return;
1272  if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1273					    &compare_code,
1274					    &compare_step_var,
1275					    &compare_base))
1276    return;
1277
1278  /* Find the taken edge.  */
1279  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1280    if (then_edge->flags & EDGE_TRUE_VALUE)
1281      break;
1282
1283  /* When comparing an IV to a loop invariant, NE is more likely to be
1284     taken while EQ is more likely to be not-taken.  */
1285  if (compare_code == NE_EXPR)
1286    {
1287      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1288      return;
1289    }
1290  else if (compare_code == EQ_EXPR)
1291    {
1292      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1293      return;
1294    }
1295
1296  if (!expr_coherent_p (loop_iv_base_var, compare_base))
1297    return;
1298
1299  /* If loop bound, base and compare bound are all constants, we can
1300     calculate the probability directly.  */
1301  if (tree_fits_shwi_p (loop_bound_var)
1302      && tree_fits_shwi_p (compare_var)
1303      && tree_fits_shwi_p (compare_base))
1304    {
1305      int probability;
1306      bool overflow, overall_overflow = false;
1307      widest_int compare_count, tem;
1308
1309      /* (loop_bound - base) / compare_step */
1310      tem = wi::sub (wi::to_widest (loop_bound_var),
1311		     wi::to_widest (compare_base), SIGNED, &overflow);
1312      overall_overflow |= overflow;
1313      widest_int loop_count = wi::div_trunc (tem,
1314					     wi::to_widest (compare_step_var),
1315					     SIGNED, &overflow);
1316      overall_overflow |= overflow;
1317
1318      if (!wi::neg_p (wi::to_widest (compare_step_var))
1319          ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1320	{
1321	  /* (loop_bound - compare_bound) / compare_step */
1322	  tem = wi::sub (wi::to_widest (loop_bound_var),
1323			 wi::to_widest (compare_var), SIGNED, &overflow);
1324	  overall_overflow |= overflow;
1325	  compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1326					 SIGNED, &overflow);
1327	  overall_overflow |= overflow;
1328	}
1329      else
1330        {
1331	  /* (compare_bound - base) / compare_step */
1332	  tem = wi::sub (wi::to_widest (compare_var),
1333			 wi::to_widest (compare_base), SIGNED, &overflow);
1334	  overall_overflow |= overflow;
1335          compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1336					 SIGNED, &overflow);
1337	  overall_overflow |= overflow;
1338	}
1339      if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1340	++compare_count;
1341      if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1342	++loop_count;
1343      if (wi::neg_p (compare_count))
1344        compare_count = 0;
1345      if (wi::neg_p (loop_count))
1346        loop_count = 0;
1347      if (loop_count == 0)
1348	probability = 0;
1349      else if (wi::cmps (compare_count, loop_count) == 1)
1350	probability = REG_BR_PROB_BASE;
1351      else
1352        {
1353	  tem = compare_count * REG_BR_PROB_BASE;
1354	  tem = wi::udiv_trunc (tem, loop_count);
1355	  probability = tem.to_uhwi ();
1356	}
1357
1358      if (!overall_overflow)
1359        predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1360
1361      return;
1362    }
1363
1364  if (expr_coherent_p (loop_bound_var, compare_var))
1365    {
1366      if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1367	  && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1368	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1369      else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1370	       && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1371	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1372      else if (loop_bound_code == NE_EXPR)
1373	{
1374	  /* If the loop backedge condition is "(i != bound)", we do
1375	     the comparison based on the step of IV:
1376	     * step < 0 : backedge condition is like (i > bound)
1377	     * step > 0 : backedge condition is like (i < bound)  */
1378	  gcc_assert (loop_bound_step != 0);
1379	  if (loop_bound_step > 0
1380	      && (compare_code == LT_EXPR
1381		  || compare_code == LE_EXPR))
1382	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1383	  else if (loop_bound_step < 0
1384		   && (compare_code == GT_EXPR
1385		       || compare_code == GE_EXPR))
1386	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1387	  else
1388	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1389	}
1390      else
1391	/* The branch is predicted not-taken if loop_bound_code is
1392	   opposite with compare_code.  */
1393	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1394    }
1395  else if (expr_coherent_p (loop_iv_base_var, compare_var))
1396    {
1397      /* For cases like:
1398	   for (i = s; i < h; i++)
1399	     if (i > s + 2) ....
1400	 The branch should be predicted taken.  */
1401      if (loop_bound_step > 0
1402	  && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1403	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1404      else if (loop_bound_step < 0
1405	       && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1406	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1407      else
1408	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1409    }
1410}
1411
1412/* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1413   exits are resulted from short-circuit conditions that will generate an
1414   if_tmp. E.g.:
1415
1416   if (foo() || global > 10)
1417     break;
1418
1419   This will be translated into:
1420
1421   BB3:
1422     loop header...
1423   BB4:
1424     if foo() goto BB6 else goto BB5
1425   BB5:
1426     if global > 10 goto BB6 else goto BB7
1427   BB6:
1428     goto BB7
1429   BB7:
1430     iftmp = (PHI 0(BB5), 1(BB6))
1431     if iftmp == 1 goto BB8 else goto BB3
1432   BB8:
1433     outside of the loop...
1434
1435   The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1436   From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1437   exits. This function takes BB7->BB8 as input, and finds out the extra loop
1438   exits to predict them using PRED_LOOP_EXIT.  */
1439
1440static void
1441predict_extra_loop_exits (edge exit_edge)
1442{
1443  unsigned i;
1444  bool check_value_one;
1445  gimple phi_stmt;
1446  tree cmp_rhs, cmp_lhs;
1447  gimple cmp_stmt = last_stmt (exit_edge->src);
1448
1449  if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1450    return;
1451  cmp_rhs = gimple_cond_rhs (cmp_stmt);
1452  cmp_lhs = gimple_cond_lhs (cmp_stmt);
1453  if (!TREE_CONSTANT (cmp_rhs)
1454      || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1455    return;
1456  if (TREE_CODE (cmp_lhs) != SSA_NAME)
1457    return;
1458
1459  /* If check_value_one is true, only the phi_args with value '1' will lead
1460     to loop exit. Otherwise, only the phi_args with value '0' will lead to
1461     loop exit.  */
1462  check_value_one = (((integer_onep (cmp_rhs))
1463		    ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1464		    ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1465
1466  phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1467  if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1468    return;
1469
1470  for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1471    {
1472      edge e1;
1473      edge_iterator ei;
1474      tree val = gimple_phi_arg_def (phi_stmt, i);
1475      edge e = gimple_phi_arg_edge (phi_stmt, i);
1476
1477      if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1478	continue;
1479      if ((check_value_one ^ integer_onep (val)) == 1)
1480	continue;
1481      if (EDGE_COUNT (e->src->succs) != 1)
1482	{
1483	  predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1484	  continue;
1485	}
1486
1487      FOR_EACH_EDGE (e1, ei, e->src->preds)
1488	predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1489    }
1490}
1491
1492/* Predict edge probabilities by exploiting loop structure.  */
1493
1494static void
1495predict_loops (void)
1496{
1497  struct loop *loop;
1498
1499  /* Try to predict out blocks in a loop that are not part of a
1500     natural loop.  */
1501  FOR_EACH_LOOP (loop, 0)
1502    {
1503      basic_block bb, *bbs;
1504      unsigned j, n_exits;
1505      vec<edge> exits;
1506      struct tree_niter_desc niter_desc;
1507      edge ex;
1508      struct nb_iter_bound *nb_iter;
1509      enum tree_code loop_bound_code = ERROR_MARK;
1510      tree loop_bound_step = NULL;
1511      tree loop_bound_var = NULL;
1512      tree loop_iv_base = NULL;
1513      gimple stmt = NULL;
1514
1515      exits = get_loop_exit_edges (loop);
1516      n_exits = exits.length ();
1517      if (!n_exits)
1518	{
1519          exits.release ();
1520	  continue;
1521	}
1522
1523      FOR_EACH_VEC_ELT (exits, j, ex)
1524	{
1525	  tree niter = NULL;
1526	  HOST_WIDE_INT nitercst;
1527	  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1528	  int probability;
1529	  enum br_predictor predictor;
1530
1531	  predict_extra_loop_exits (ex);
1532
1533	  if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1534	    niter = niter_desc.niter;
1535	  if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1536	    niter = loop_niter_by_eval (loop, ex);
1537
1538	  if (TREE_CODE (niter) == INTEGER_CST)
1539	    {
1540	      if (tree_fits_uhwi_p (niter)
1541		  && max
1542		  && compare_tree_int (niter, max - 1) == -1)
1543		nitercst = tree_to_uhwi (niter) + 1;
1544	      else
1545		nitercst = max;
1546	      predictor = PRED_LOOP_ITERATIONS;
1547	    }
1548	  /* If we have just one exit and we can derive some information about
1549	     the number of iterations of the loop from the statements inside
1550	     the loop, use it to predict this exit.  */
1551	  else if (n_exits == 1)
1552	    {
1553	      nitercst = estimated_stmt_executions_int (loop);
1554	      if (nitercst < 0)
1555		continue;
1556	      if (nitercst > max)
1557		nitercst = max;
1558
1559	      predictor = PRED_LOOP_ITERATIONS_GUESSED;
1560	    }
1561	  else
1562	    continue;
1563
1564	  /* If the prediction for number of iterations is zero, do not
1565	     predict the exit edges.  */
1566	  if (nitercst == 0)
1567	    continue;
1568
1569	  probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1570	  predict_edge (ex, predictor, probability);
1571	}
1572      exits.release ();
1573
1574      /* Find information about loop bound variables.  */
1575      for (nb_iter = loop->bounds; nb_iter;
1576	   nb_iter = nb_iter->next)
1577	if (nb_iter->stmt
1578	    && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1579	  {
1580	    stmt = nb_iter->stmt;
1581	    break;
1582	  }
1583      if (!stmt && last_stmt (loop->header)
1584	  && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1585	stmt = last_stmt (loop->header);
1586      if (stmt)
1587	is_comparison_with_loop_invariant_p (stmt, loop,
1588					     &loop_bound_var,
1589					     &loop_bound_code,
1590					     &loop_bound_step,
1591					     &loop_iv_base);
1592
1593      bbs = get_loop_body (loop);
1594
1595      for (j = 0; j < loop->num_nodes; j++)
1596	{
1597	  int header_found = 0;
1598	  edge e;
1599	  edge_iterator ei;
1600
1601	  bb = bbs[j];
1602
1603	  /* Bypass loop heuristics on continue statement.  These
1604	     statements construct loops via "non-loop" constructs
1605	     in the source language and are better to be handled
1606	     separately.  */
1607	  if (predicted_by_p (bb, PRED_CONTINUE))
1608	    continue;
1609
1610	  /* Loop branch heuristics - predict an edge back to a
1611	     loop's head as taken.  */
1612	  if (bb == loop->latch)
1613	    {
1614	      e = find_edge (loop->latch, loop->header);
1615	      if (e)
1616		{
1617		  header_found = 1;
1618		  predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1619		}
1620	    }
1621
1622	  /* Loop exit heuristics - predict an edge exiting the loop if the
1623	     conditional has no loop header successors as not taken.  */
1624	  if (!header_found
1625	      /* If we already used more reliable loop exit predictors, do not
1626		 bother with PRED_LOOP_EXIT.  */
1627	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1628	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1629	    {
1630	      /* For loop with many exits we don't want to predict all exits
1631	         with the pretty large probability, because if all exits are
1632		 considered in row, the loop would be predicted to iterate
1633		 almost never.  The code to divide probability by number of
1634		 exits is very rough.  It should compute the number of exits
1635		 taken in each patch through function (not the overall number
1636		 of exits that might be a lot higher for loops with wide switch
1637		 statements in them) and compute n-th square root.
1638
1639		 We limit the minimal probability by 2% to avoid
1640		 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1641		 as this was causing regression in perl benchmark containing such
1642		 a wide loop.  */
1643
1644	      int probability = ((REG_BR_PROB_BASE
1645		                  - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1646				 / n_exits);
1647	      if (probability < HITRATE (2))
1648		probability = HITRATE (2);
1649	      FOR_EACH_EDGE (e, ei, bb->succs)
1650		if (e->dest->index < NUM_FIXED_BLOCKS
1651		    || !flow_bb_inside_loop_p (loop, e->dest))
1652		  predict_edge (e, PRED_LOOP_EXIT, probability);
1653	    }
1654	  if (loop_bound_var)
1655	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1656				   loop_bound_code,
1657				   tree_to_shwi (loop_bound_step));
1658	}
1659
1660      /* Free basic blocks from get_loop_body.  */
1661      free (bbs);
1662    }
1663}
1664
1665/* Attempt to predict probabilities of BB outgoing edges using local
1666   properties.  */
1667static void
1668bb_estimate_probability_locally (basic_block bb)
1669{
1670  rtx_insn *last_insn = BB_END (bb);
1671  rtx cond;
1672
1673  if (! can_predict_insn_p (last_insn))
1674    return;
1675  cond = get_condition (last_insn, NULL, false, false);
1676  if (! cond)
1677    return;
1678
1679  /* Try "pointer heuristic."
1680     A comparison ptr == 0 is predicted as false.
1681     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1682  if (COMPARISON_P (cond)
1683      && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1684	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1685    {
1686      if (GET_CODE (cond) == EQ)
1687	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1688      else if (GET_CODE (cond) == NE)
1689	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1690    }
1691  else
1692
1693  /* Try "opcode heuristic."
1694     EQ tests are usually false and NE tests are usually true. Also,
1695     most quantities are positive, so we can make the appropriate guesses
1696     about signed comparisons against zero.  */
1697    switch (GET_CODE (cond))
1698      {
1699      case CONST_INT:
1700	/* Unconditional branch.  */
1701	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1702			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
1703	break;
1704
1705      case EQ:
1706      case UNEQ:
1707	/* Floating point comparisons appears to behave in a very
1708	   unpredictable way because of special role of = tests in
1709	   FP code.  */
1710	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1711	  ;
1712	/* Comparisons with 0 are often used for booleans and there is
1713	   nothing useful to predict about them.  */
1714	else if (XEXP (cond, 1) == const0_rtx
1715		 || XEXP (cond, 0) == const0_rtx)
1716	  ;
1717	else
1718	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1719	break;
1720
1721      case NE:
1722      case LTGT:
1723	/* Floating point comparisons appears to behave in a very
1724	   unpredictable way because of special role of = tests in
1725	   FP code.  */
1726	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1727	  ;
1728	/* Comparisons with 0 are often used for booleans and there is
1729	   nothing useful to predict about them.  */
1730	else if (XEXP (cond, 1) == const0_rtx
1731		 || XEXP (cond, 0) == const0_rtx)
1732	  ;
1733	else
1734	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1735	break;
1736
1737      case ORDERED:
1738	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1739	break;
1740
1741      case UNORDERED:
1742	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1743	break;
1744
1745      case LE:
1746      case LT:
1747	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1748	    || XEXP (cond, 1) == constm1_rtx)
1749	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1750	break;
1751
1752      case GE:
1753      case GT:
1754	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1755	    || XEXP (cond, 1) == constm1_rtx)
1756	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1757	break;
1758
1759      default:
1760	break;
1761      }
1762}
1763
1764/* Set edge->probability for each successor edge of BB.  */
1765void
1766guess_outgoing_edge_probabilities (basic_block bb)
1767{
1768  bb_estimate_probability_locally (bb);
1769  combine_predictions_for_insn (BB_END (bb), bb);
1770}
1771
1772static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1773
1774/* Helper function for expr_expected_value.  */
1775
1776static tree
1777expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1778		       tree op1, bitmap visited, enum br_predictor *predictor)
1779{
1780  gimple def;
1781
1782  if (predictor)
1783    *predictor = PRED_UNCONDITIONAL;
1784
1785  if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1786    {
1787      if (TREE_CONSTANT (op0))
1788	return op0;
1789
1790      if (code != SSA_NAME)
1791	return NULL_TREE;
1792
1793      def = SSA_NAME_DEF_STMT (op0);
1794
1795      /* If we were already here, break the infinite cycle.  */
1796      if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1797	return NULL;
1798
1799      if (gimple_code (def) == GIMPLE_PHI)
1800	{
1801	  /* All the arguments of the PHI node must have the same constant
1802	     length.  */
1803	  int i, n = gimple_phi_num_args (def);
1804	  tree val = NULL, new_val;
1805
1806	  for (i = 0; i < n; i++)
1807	    {
1808	      tree arg = PHI_ARG_DEF (def, i);
1809	      enum br_predictor predictor2;
1810
1811	      /* If this PHI has itself as an argument, we cannot
1812		 determine the string length of this argument.  However,
1813		 if we can find an expected constant value for the other
1814		 PHI args then we can still be sure that this is
1815		 likely a constant.  So be optimistic and just
1816		 continue with the next argument.  */
1817	      if (arg == PHI_RESULT (def))
1818		continue;
1819
1820	      new_val = expr_expected_value (arg, visited, &predictor2);
1821
1822	      /* It is difficult to combine value predictors.  Simply assume
1823		 that later predictor is weaker and take its prediction.  */
1824	      if (predictor && *predictor < predictor2)
1825		*predictor = predictor2;
1826	      if (!new_val)
1827		return NULL;
1828	      if (!val)
1829		val = new_val;
1830	      else if (!operand_equal_p (val, new_val, false))
1831		return NULL;
1832	    }
1833	  return val;
1834	}
1835      if (is_gimple_assign (def))
1836	{
1837	  if (gimple_assign_lhs (def) != op0)
1838	    return NULL;
1839
1840	  return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1841					gimple_assign_rhs1 (def),
1842					gimple_assign_rhs_code (def),
1843					gimple_assign_rhs2 (def),
1844					visited, predictor);
1845	}
1846
1847      if (is_gimple_call (def))
1848	{
1849	  tree decl = gimple_call_fndecl (def);
1850	  if (!decl)
1851	    {
1852	      if (gimple_call_internal_p (def)
1853		  && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1854		{
1855		  gcc_assert (gimple_call_num_args (def) == 3);
1856		  tree val = gimple_call_arg (def, 0);
1857		  if (TREE_CONSTANT (val))
1858		    return val;
1859		  if (predictor)
1860		    {
1861		      tree val2 = gimple_call_arg (def, 2);
1862		      gcc_assert (TREE_CODE (val2) == INTEGER_CST
1863				  && tree_fits_uhwi_p (val2)
1864				  && tree_to_uhwi (val2) < END_PREDICTORS);
1865		      *predictor = (enum br_predictor) tree_to_uhwi (val2);
1866		    }
1867		  return gimple_call_arg (def, 1);
1868		}
1869	      return NULL;
1870	    }
1871	  if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1872	    switch (DECL_FUNCTION_CODE (decl))
1873	      {
1874	      case BUILT_IN_EXPECT:
1875		{
1876		  tree val;
1877		  if (gimple_call_num_args (def) != 2)
1878		    return NULL;
1879		  val = gimple_call_arg (def, 0);
1880		  if (TREE_CONSTANT (val))
1881		    return val;
1882		  if (predictor)
1883		    *predictor = PRED_BUILTIN_EXPECT;
1884		  return gimple_call_arg (def, 1);
1885		}
1886
1887	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1888	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1889	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1890	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1891	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1892	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1893	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1894	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1895	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1896	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1897	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1898	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1899	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1900		/* Assume that any given atomic operation has low contention,
1901		   and thus the compare-and-swap operation succeeds.  */
1902		if (predictor)
1903		  *predictor = PRED_COMPARE_AND_SWAP;
1904		return boolean_true_node;
1905	    }
1906	}
1907
1908      return NULL;
1909    }
1910
1911  if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1912    {
1913      tree res;
1914      enum br_predictor predictor2;
1915      op0 = expr_expected_value (op0, visited, predictor);
1916      if (!op0)
1917	return NULL;
1918      op1 = expr_expected_value (op1, visited, &predictor2);
1919      if (predictor && *predictor < predictor2)
1920	*predictor = predictor2;
1921      if (!op1)
1922	return NULL;
1923      res = fold_build2 (code, type, op0, op1);
1924      if (TREE_CONSTANT (res))
1925	return res;
1926      return NULL;
1927    }
1928  if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1929    {
1930      tree res;
1931      op0 = expr_expected_value (op0, visited, predictor);
1932      if (!op0)
1933	return NULL;
1934      res = fold_build1 (code, type, op0);
1935      if (TREE_CONSTANT (res))
1936	return res;
1937      return NULL;
1938    }
1939  return NULL;
1940}
1941
1942/* Return constant EXPR will likely have at execution time, NULL if unknown.
1943   The function is used by builtin_expect branch predictor so the evidence
1944   must come from this construct and additional possible constant folding.
1945
1946   We may want to implement more involved value guess (such as value range
1947   propagation based prediction), but such tricks shall go to new
1948   implementation.  */
1949
1950static tree
1951expr_expected_value (tree expr, bitmap visited,
1952		     enum br_predictor *predictor)
1953{
1954  enum tree_code code;
1955  tree op0, op1;
1956
1957  if (TREE_CONSTANT (expr))
1958    {
1959      if (predictor)
1960	*predictor = PRED_UNCONDITIONAL;
1961      return expr;
1962    }
1963
1964  extract_ops_from_tree (expr, &code, &op0, &op1);
1965  return expr_expected_value_1 (TREE_TYPE (expr),
1966				op0, code, op1, visited, predictor);
1967}
1968
1969/* Predict using opcode of the last statement in basic block.  */
1970static void
1971tree_predict_by_opcode (basic_block bb)
1972{
1973  gimple stmt = last_stmt (bb);
1974  edge then_edge;
1975  tree op0, op1;
1976  tree type;
1977  tree val;
1978  enum tree_code cmp;
1979  bitmap visited;
1980  edge_iterator ei;
1981  enum br_predictor predictor;
1982
1983  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1984    return;
1985  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1986    if (then_edge->flags & EDGE_TRUE_VALUE)
1987      break;
1988  op0 = gimple_cond_lhs (stmt);
1989  op1 = gimple_cond_rhs (stmt);
1990  cmp = gimple_cond_code (stmt);
1991  type = TREE_TYPE (op0);
1992  visited = BITMAP_ALLOC (NULL);
1993  val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
1994			       &predictor);
1995  BITMAP_FREE (visited);
1996  if (val && TREE_CODE (val) == INTEGER_CST)
1997    {
1998      if (predictor == PRED_BUILTIN_EXPECT)
1999	{
2000	  int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2001
2002	  gcc_assert (percent >= 0 && percent <= 100);
2003	  if (integer_zerop (val))
2004	    percent = 100 - percent;
2005	  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2006	}
2007      else
2008	predict_edge (then_edge, predictor,
2009		      integer_zerop (val) ? NOT_TAKEN : TAKEN);
2010    }
2011  /* Try "pointer heuristic."
2012     A comparison ptr == 0 is predicted as false.
2013     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2014  if (POINTER_TYPE_P (type))
2015    {
2016      if (cmp == EQ_EXPR)
2017	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2018      else if (cmp == NE_EXPR)
2019	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2020    }
2021  else
2022
2023  /* Try "opcode heuristic."
2024     EQ tests are usually false and NE tests are usually true. Also,
2025     most quantities are positive, so we can make the appropriate guesses
2026     about signed comparisons against zero.  */
2027    switch (cmp)
2028      {
2029      case EQ_EXPR:
2030      case UNEQ_EXPR:
2031	/* Floating point comparisons appears to behave in a very
2032	   unpredictable way because of special role of = tests in
2033	   FP code.  */
2034	if (FLOAT_TYPE_P (type))
2035	  ;
2036	/* Comparisons with 0 are often used for booleans and there is
2037	   nothing useful to predict about them.  */
2038	else if (integer_zerop (op0) || integer_zerop (op1))
2039	  ;
2040	else
2041	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2042	break;
2043
2044      case NE_EXPR:
2045      case LTGT_EXPR:
2046	/* Floating point comparisons appears to behave in a very
2047	   unpredictable way because of special role of = tests in
2048	   FP code.  */
2049	if (FLOAT_TYPE_P (type))
2050	  ;
2051	/* Comparisons with 0 are often used for booleans and there is
2052	   nothing useful to predict about them.  */
2053	else if (integer_zerop (op0)
2054		 || integer_zerop (op1))
2055	  ;
2056	else
2057	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2058	break;
2059
2060      case ORDERED_EXPR:
2061	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2062	break;
2063
2064      case UNORDERED_EXPR:
2065	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2066	break;
2067
2068      case LE_EXPR:
2069      case LT_EXPR:
2070	if (integer_zerop (op1)
2071	    || integer_onep (op1)
2072	    || integer_all_onesp (op1)
2073	    || real_zerop (op1)
2074	    || real_onep (op1)
2075	    || real_minus_onep (op1))
2076	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2077	break;
2078
2079      case GE_EXPR:
2080      case GT_EXPR:
2081	if (integer_zerop (op1)
2082	    || integer_onep (op1)
2083	    || integer_all_onesp (op1)
2084	    || real_zerop (op1)
2085	    || real_onep (op1)
2086	    || real_minus_onep (op1))
2087	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2088	break;
2089
2090      default:
2091	break;
2092      }
2093}
2094
2095/* Try to guess whether the value of return means error code.  */
2096
2097static enum br_predictor
2098return_prediction (tree val, enum prediction *prediction)
2099{
2100  /* VOID.  */
2101  if (!val)
2102    return PRED_NO_PREDICTION;
2103  /* Different heuristics for pointers and scalars.  */
2104  if (POINTER_TYPE_P (TREE_TYPE (val)))
2105    {
2106      /* NULL is usually not returned.  */
2107      if (integer_zerop (val))
2108	{
2109	  *prediction = NOT_TAKEN;
2110	  return PRED_NULL_RETURN;
2111	}
2112    }
2113  else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2114    {
2115      /* Negative return values are often used to indicate
2116         errors.  */
2117      if (TREE_CODE (val) == INTEGER_CST
2118	  && tree_int_cst_sgn (val) < 0)
2119	{
2120	  *prediction = NOT_TAKEN;
2121	  return PRED_NEGATIVE_RETURN;
2122	}
2123      /* Constant return values seems to be commonly taken.
2124         Zero/one often represent booleans so exclude them from the
2125	 heuristics.  */
2126      if (TREE_CONSTANT (val)
2127	  && (!integer_zerop (val) && !integer_onep (val)))
2128	{
2129	  *prediction = TAKEN;
2130	  return PRED_CONST_RETURN;
2131	}
2132    }
2133  return PRED_NO_PREDICTION;
2134}
2135
2136/* Find the basic block with return expression and look up for possible
2137   return value trying to apply RETURN_PREDICTION heuristics.  */
2138static void
2139apply_return_prediction (void)
2140{
2141  gimple return_stmt = NULL;
2142  tree return_val;
2143  edge e;
2144  gimple phi;
2145  int phi_num_args, i;
2146  enum br_predictor pred;
2147  enum prediction direction;
2148  edge_iterator ei;
2149
2150  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2151    {
2152      return_stmt = last_stmt (e->src);
2153      if (return_stmt
2154	  && gimple_code (return_stmt) == GIMPLE_RETURN)
2155	break;
2156    }
2157  if (!e)
2158    return;
2159  return_val = gimple_return_retval (return_stmt);
2160  if (!return_val)
2161    return;
2162  if (TREE_CODE (return_val) != SSA_NAME
2163      || !SSA_NAME_DEF_STMT (return_val)
2164      || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2165    return;
2166  phi = SSA_NAME_DEF_STMT (return_val);
2167  phi_num_args = gimple_phi_num_args (phi);
2168  pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2169
2170  /* Avoid the degenerate case where all return values form the function
2171     belongs to same category (ie they are all positive constants)
2172     so we can hardly say something about them.  */
2173  for (i = 1; i < phi_num_args; i++)
2174    if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2175      break;
2176  if (i != phi_num_args)
2177    for (i = 0; i < phi_num_args; i++)
2178      {
2179	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2180	if (pred != PRED_NO_PREDICTION)
2181	  predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2182				         direction);
2183      }
2184}
2185
2186/* Look for basic block that contains unlikely to happen events
2187   (such as noreturn calls) and mark all paths leading to execution
2188   of this basic blocks as unlikely.  */
2189
2190static void
2191tree_bb_level_predictions (void)
2192{
2193  basic_block bb;
2194  bool has_return_edges = false;
2195  edge e;
2196  edge_iterator ei;
2197
2198  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2199    if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2200      {
2201        has_return_edges = true;
2202	break;
2203      }
2204
2205  apply_return_prediction ();
2206
2207  FOR_EACH_BB_FN (bb, cfun)
2208    {
2209      gimple_stmt_iterator gsi;
2210
2211      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2212	{
2213	  gimple stmt = gsi_stmt (gsi);
2214	  tree decl;
2215
2216	  if (is_gimple_call (stmt))
2217	    {
2218	      if ((gimple_call_flags (stmt) & ECF_NORETURN)
2219	          && has_return_edges)
2220		predict_paths_leading_to (bb, PRED_NORETURN,
2221					  NOT_TAKEN);
2222	      decl = gimple_call_fndecl (stmt);
2223	      if (decl
2224		  && lookup_attribute ("cold",
2225				       DECL_ATTRIBUTES (decl)))
2226		predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2227					  NOT_TAKEN);
2228	    }
2229	  else if (gimple_code (stmt) == GIMPLE_PREDICT)
2230	    {
2231	      predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2232					gimple_predict_outcome (stmt));
2233	      /* Keep GIMPLE_PREDICT around so early inlining will propagate
2234	         hints to callers.  */
2235	    }
2236	}
2237    }
2238}
2239
2240#ifdef ENABLE_CHECKING
2241
2242/* Callback for hash_map::traverse, asserts that the pointer map is
2243   empty.  */
2244
2245bool
2246assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2247		 void *)
2248{
2249  gcc_assert (!value);
2250  return false;
2251}
2252#endif
2253
2254/* Predict branch probabilities and estimate profile for basic block BB.  */
2255
2256static void
2257tree_estimate_probability_bb (basic_block bb)
2258{
2259  edge e;
2260  edge_iterator ei;
2261  gimple last;
2262
2263  FOR_EACH_EDGE (e, ei, bb->succs)
2264    {
2265      /* Predict edges to user labels with attributes.  */
2266      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2267	{
2268	  gimple_stmt_iterator gi;
2269	  for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2270	    {
2271	      gimple stmt = gsi_stmt (gi);
2272	      tree decl;
2273
2274	      if (gimple_code (stmt) != GIMPLE_LABEL)
2275		break;
2276	      decl = gimple_label_label (stmt);
2277	      if (DECL_ARTIFICIAL (decl))
2278		continue;
2279
2280	      /* Finally, we have a user-defined label.  */
2281	      if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2282		predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2283	      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2284		predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2285	    }
2286	}
2287
2288      /* Predict early returns to be probable, as we've already taken
2289	 care for error returns and other cases are often used for
2290	 fast paths through function.
2291
2292	 Since we've already removed the return statements, we are
2293	 looking for CFG like:
2294
2295	 if (conditional)
2296	 {
2297	 ..
2298	 goto return_block
2299	 }
2300	 some other blocks
2301	 return_block:
2302	 return_stmt.  */
2303      if (e->dest != bb->next_bb
2304	  && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2305	  && single_succ_p (e->dest)
2306	  && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2307	  && (last = last_stmt (e->dest)) != NULL
2308	  && gimple_code (last) == GIMPLE_RETURN)
2309	{
2310	  edge e1;
2311	  edge_iterator ei1;
2312
2313	  if (single_succ_p (bb))
2314	    {
2315	      FOR_EACH_EDGE (e1, ei1, bb->preds)
2316		if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2317		    && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2318		    && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2319		  predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2320	    }
2321	  else
2322	    if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2323		&& !predicted_by_p (e->src, PRED_CONST_RETURN)
2324		&& !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2325	      predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2326	}
2327
2328      /* Look for block we are guarding (ie we dominate it,
2329	 but it doesn't postdominate us).  */
2330      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2331	  && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2332	  && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2333	{
2334	  gimple_stmt_iterator bi;
2335
2336	  /* The call heuristic claims that a guarded function call
2337	     is improbable.  This is because such calls are often used
2338	     to signal exceptional situations such as printing error
2339	     messages.  */
2340	  for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2341	       gsi_next (&bi))
2342	    {
2343	      gimple stmt = gsi_stmt (bi);
2344	      if (is_gimple_call (stmt)
2345		  /* Constant and pure calls are hardly used to signalize
2346		     something exceptional.  */
2347		  && gimple_has_side_effects (stmt))
2348		{
2349		  predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2350		  break;
2351		}
2352	    }
2353	}
2354    }
2355  tree_predict_by_opcode (bb);
2356}
2357
2358/* Predict branch probabilities and estimate profile of the tree CFG.
2359   This function can be called from the loop optimizers to recompute
2360   the profile information.  */
2361
2362void
2363tree_estimate_probability (void)
2364{
2365  basic_block bb;
2366
2367  add_noreturn_fake_exit_edges ();
2368  connect_infinite_loops_to_exit ();
2369  /* We use loop_niter_by_eval, which requires that the loops have
2370     preheaders.  */
2371  create_preheaders (CP_SIMPLE_PREHEADERS);
2372  calculate_dominance_info (CDI_POST_DOMINATORS);
2373
2374  bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2375  tree_bb_level_predictions ();
2376  record_loop_exits ();
2377
2378  if (number_of_loops (cfun) > 1)
2379    predict_loops ();
2380
2381  FOR_EACH_BB_FN (bb, cfun)
2382    tree_estimate_probability_bb (bb);
2383
2384  FOR_EACH_BB_FN (bb, cfun)
2385    combine_predictions_for_bb (bb);
2386
2387#ifdef ENABLE_CHECKING
2388  bb_predictions->traverse<void *, assert_is_empty> (NULL);
2389#endif
2390  delete bb_predictions;
2391  bb_predictions = NULL;
2392
2393  estimate_bb_frequencies (false);
2394  free_dominance_info (CDI_POST_DOMINATORS);
2395  remove_fake_exit_edges ();
2396}
2397
2398/* Predict edges to successors of CUR whose sources are not postdominated by
2399   BB by PRED and recurse to all postdominators.  */
2400
2401static void
2402predict_paths_for_bb (basic_block cur, basic_block bb,
2403		      enum br_predictor pred,
2404		      enum prediction taken,
2405		      bitmap visited)
2406{
2407  edge e;
2408  edge_iterator ei;
2409  basic_block son;
2410
2411  /* We are looking for all edges forming edge cut induced by
2412     set of all blocks postdominated by BB.  */
2413  FOR_EACH_EDGE (e, ei, cur->preds)
2414    if (e->src->index >= NUM_FIXED_BLOCKS
2415	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2416    {
2417      edge e2;
2418      edge_iterator ei2;
2419      bool found = false;
2420
2421      /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2422      if (e->flags & (EDGE_EH | EDGE_FAKE))
2423	continue;
2424      gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2425
2426      /* See if there is an edge from e->src that is not abnormal
2427	 and does not lead to BB.  */
2428      FOR_EACH_EDGE (e2, ei2, e->src->succs)
2429	if (e2 != e
2430	    && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2431	    && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2432	  {
2433	    found = true;
2434	    break;
2435	  }
2436
2437      /* If there is non-abnormal path leaving e->src, predict edge
2438	 using predictor.  Otherwise we need to look for paths
2439	 leading to e->src.
2440
2441	 The second may lead to infinite loop in the case we are predicitng
2442	 regions that are only reachable by abnormal edges.  We simply
2443	 prevent visiting given BB twice.  */
2444      if (found)
2445        predict_edge_def (e, pred, taken);
2446      else if (bitmap_set_bit (visited, e->src->index))
2447	predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2448    }
2449  for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2450       son;
2451       son = next_dom_son (CDI_POST_DOMINATORS, son))
2452    predict_paths_for_bb (son, bb, pred, taken, visited);
2453}
2454
2455/* Sets branch probabilities according to PREDiction and
2456   FLAGS.  */
2457
2458static void
2459predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2460			  enum prediction taken)
2461{
2462  bitmap visited = BITMAP_ALLOC (NULL);
2463  predict_paths_for_bb (bb, bb, pred, taken, visited);
2464  BITMAP_FREE (visited);
2465}
2466
2467/* Like predict_paths_leading_to but take edge instead of basic block.  */
2468
2469static void
2470predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2471			       enum prediction taken)
2472{
2473  bool has_nonloop_edge = false;
2474  edge_iterator ei;
2475  edge e2;
2476
2477  basic_block bb = e->src;
2478  FOR_EACH_EDGE (e2, ei, bb->succs)
2479    if (e2->dest != e->src && e2->dest != e->dest
2480	&& !(e->flags & (EDGE_EH | EDGE_FAKE))
2481	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2482      {
2483	has_nonloop_edge = true;
2484	break;
2485      }
2486  if (!has_nonloop_edge)
2487    {
2488      bitmap visited = BITMAP_ALLOC (NULL);
2489      predict_paths_for_bb (bb, bb, pred, taken, visited);
2490      BITMAP_FREE (visited);
2491    }
2492  else
2493    predict_edge_def (e, pred, taken);
2494}
2495
2496/* This is used to carry information about basic blocks.  It is
2497   attached to the AUX field of the standard CFG block.  */
2498
2499typedef struct block_info_def
2500{
2501  /* Estimated frequency of execution of basic_block.  */
2502  sreal frequency;
2503
2504  /* To keep queue of basic blocks to process.  */
2505  basic_block next;
2506
2507  /* Number of predecessors we need to visit first.  */
2508  int npredecessors;
2509} *block_info;
2510
2511/* Similar information for edges.  */
2512typedef struct edge_info_def
2513{
2514  /* In case edge is a loopback edge, the probability edge will be reached
2515     in case header is.  Estimated number of iterations of the loop can be
2516     then computed as 1 / (1 - back_edge_prob).  */
2517  sreal back_edge_prob;
2518  /* True if the edge is a loopback edge in the natural loop.  */
2519  unsigned int back_edge:1;
2520} *edge_info;
2521
2522#define BLOCK_INFO(B)	((block_info) (B)->aux)
2523#define EDGE_INFO(E)	((edge_info) (E)->aux)
2524
2525/* Helper function for estimate_bb_frequencies.
2526   Propagate the frequencies in blocks marked in
2527   TOVISIT, starting in HEAD.  */
2528
2529static void
2530propagate_freq (basic_block head, bitmap tovisit)
2531{
2532  basic_block bb;
2533  basic_block last;
2534  unsigned i;
2535  edge e;
2536  basic_block nextbb;
2537  bitmap_iterator bi;
2538
2539  /* For each basic block we need to visit count number of his predecessors
2540     we need to visit first.  */
2541  EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2542    {
2543      edge_iterator ei;
2544      int count = 0;
2545
2546      bb = BASIC_BLOCK_FOR_FN (cfun, i);
2547
2548      FOR_EACH_EDGE (e, ei, bb->preds)
2549	{
2550	  bool visit = bitmap_bit_p (tovisit, e->src->index);
2551
2552	  if (visit && !(e->flags & EDGE_DFS_BACK))
2553	    count++;
2554	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2555	    fprintf (dump_file,
2556		     "Irreducible region hit, ignoring edge to %i->%i\n",
2557		     e->src->index, bb->index);
2558	}
2559      BLOCK_INFO (bb)->npredecessors = count;
2560      /* When function never returns, we will never process exit block.  */
2561      if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2562	bb->count = bb->frequency = 0;
2563    }
2564
2565  memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2566  last = head;
2567  for (bb = head; bb; bb = nextbb)
2568    {
2569      edge_iterator ei;
2570      sreal cyclic_probability, frequency;
2571
2572      memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2573      memcpy (&frequency, &real_zero, sizeof (real_zero));
2574
2575      nextbb = BLOCK_INFO (bb)->next;
2576      BLOCK_INFO (bb)->next = NULL;
2577
2578      /* Compute frequency of basic block.  */
2579      if (bb != head)
2580	{
2581#ifdef ENABLE_CHECKING
2582	  FOR_EACH_EDGE (e, ei, bb->preds)
2583	    gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2584			|| (e->flags & EDGE_DFS_BACK));
2585#endif
2586
2587	  FOR_EACH_EDGE (e, ei, bb->preds)
2588	    if (EDGE_INFO (e)->back_edge)
2589	      {
2590		sreal_add (&cyclic_probability, &cyclic_probability,
2591			   &EDGE_INFO (e)->back_edge_prob);
2592	      }
2593	    else if (!(e->flags & EDGE_DFS_BACK))
2594	      {
2595		sreal tmp;
2596
2597		/*  frequency += (e->probability
2598				  * BLOCK_INFO (e->src)->frequency /
2599				  REG_BR_PROB_BASE);  */
2600
2601		sreal_init (&tmp, e->probability, 0);
2602		sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2603		sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2604		sreal_add (&frequency, &frequency, &tmp);
2605	      }
2606
2607	  if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2608	    {
2609	      memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2610		      sizeof (frequency));
2611	    }
2612	  else
2613	    {
2614	      if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2615		{
2616		  memcpy (&cyclic_probability, &real_almost_one,
2617			  sizeof (real_almost_one));
2618		}
2619
2620	      /* BLOCK_INFO (bb)->frequency = frequency
2621					      / (1 - cyclic_probability) */
2622
2623	      sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2624	      sreal_div (&BLOCK_INFO (bb)->frequency,
2625			 &frequency, &cyclic_probability);
2626	    }
2627	}
2628
2629      bitmap_clear_bit (tovisit, bb->index);
2630
2631      e = find_edge (bb, head);
2632      if (e)
2633	{
2634	  sreal tmp;
2635
2636	  /* EDGE_INFO (e)->back_edge_prob
2637	     = ((e->probability * BLOCK_INFO (bb)->frequency)
2638	     / REG_BR_PROB_BASE); */
2639
2640	  sreal_init (&tmp, e->probability, 0);
2641	  sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2642	  sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2643		     &tmp, &real_inv_br_prob_base);
2644	}
2645
2646      /* Propagate to successor blocks.  */
2647      FOR_EACH_EDGE (e, ei, bb->succs)
2648	if (!(e->flags & EDGE_DFS_BACK)
2649	    && BLOCK_INFO (e->dest)->npredecessors)
2650	  {
2651	    BLOCK_INFO (e->dest)->npredecessors--;
2652	    if (!BLOCK_INFO (e->dest)->npredecessors)
2653	      {
2654		if (!nextbb)
2655		  nextbb = e->dest;
2656		else
2657		  BLOCK_INFO (last)->next = e->dest;
2658
2659		last = e->dest;
2660	      }
2661	  }
2662    }
2663}
2664
2665/* Estimate frequencies in loops at same nest level.  */
2666
2667static void
2668estimate_loops_at_level (struct loop *first_loop)
2669{
2670  struct loop *loop;
2671
2672  for (loop = first_loop; loop; loop = loop->next)
2673    {
2674      edge e;
2675      basic_block *bbs;
2676      unsigned i;
2677      bitmap tovisit = BITMAP_ALLOC (NULL);
2678
2679      estimate_loops_at_level (loop->inner);
2680
2681      /* Find current loop back edge and mark it.  */
2682      e = loop_latch_edge (loop);
2683      EDGE_INFO (e)->back_edge = 1;
2684
2685      bbs = get_loop_body (loop);
2686      for (i = 0; i < loop->num_nodes; i++)
2687	bitmap_set_bit (tovisit, bbs[i]->index);
2688      free (bbs);
2689      propagate_freq (loop->header, tovisit);
2690      BITMAP_FREE (tovisit);
2691    }
2692}
2693
2694/* Propagates frequencies through structure of loops.  */
2695
2696static void
2697estimate_loops (void)
2698{
2699  bitmap tovisit = BITMAP_ALLOC (NULL);
2700  basic_block bb;
2701
2702  /* Start by estimating the frequencies in the loops.  */
2703  if (number_of_loops (cfun) > 1)
2704    estimate_loops_at_level (current_loops->tree_root->inner);
2705
2706  /* Now propagate the frequencies through all the blocks.  */
2707  FOR_ALL_BB_FN (bb, cfun)
2708    {
2709      bitmap_set_bit (tovisit, bb->index);
2710    }
2711  propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2712  BITMAP_FREE (tovisit);
2713}
2714
2715/* Drop the profile for NODE to guessed, and update its frequency based on
2716   whether it is expected to be hot given the CALL_COUNT.  */
2717
2718static void
2719drop_profile (struct cgraph_node *node, gcov_type call_count)
2720{
2721  struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2722  /* In the case where this was called by another function with a
2723     dropped profile, call_count will be 0. Since there are no
2724     non-zero call counts to this function, we don't know for sure
2725     whether it is hot, and therefore it will be marked normal below.  */
2726  bool hot = maybe_hot_count_p (NULL, call_count);
2727
2728  if (dump_file)
2729    fprintf (dump_file,
2730             "Dropping 0 profile for %s/%i. %s based on calls.\n",
2731             node->name (), node->order,
2732             hot ? "Function is hot" : "Function is normal");
2733  /* We only expect to miss profiles for functions that are reached
2734     via non-zero call edges in cases where the function may have
2735     been linked from another module or library (COMDATs and extern
2736     templates). See the comments below for handle_missing_profiles.
2737     Also, only warn in cases where the missing counts exceed the
2738     number of training runs. In certain cases with an execv followed
2739     by a no-return call the profile for the no-return call is not
2740     dumped and there can be a mismatch.  */
2741  if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2742      && call_count > profile_info->runs)
2743    {
2744      if (flag_profile_correction)
2745        {
2746          if (dump_file)
2747            fprintf (dump_file,
2748                     "Missing counts for called function %s/%i\n",
2749                     node->name (), node->order);
2750        }
2751      else
2752        warning (0, "Missing counts for called function %s/%i",
2753                 node->name (), node->order);
2754    }
2755
2756  profile_status_for_fn (fn)
2757      = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2758  node->frequency
2759      = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2760}
2761
2762/* In the case of COMDAT routines, multiple object files will contain the same
2763   function and the linker will select one for the binary. In that case
2764   all the other copies from the profile instrument binary will be missing
2765   profile counts. Look for cases where this happened, due to non-zero
2766   call counts going to 0-count functions, and drop the profile to guessed
2767   so that we can use the estimated probabilities and avoid optimizing only
2768   for size.
2769
2770   The other case where the profile may be missing is when the routine
2771   is not going to be emitted to the object file, e.g. for "extern template"
2772   class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2773   all other cases of non-zero calls to 0-count functions.  */
2774
2775void
2776handle_missing_profiles (void)
2777{
2778  struct cgraph_node *node;
2779  int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2780  vec<struct cgraph_node *> worklist;
2781  worklist.create (64);
2782
2783  /* See if 0 count function has non-0 count callers.  In this case we
2784     lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2785  FOR_EACH_DEFINED_FUNCTION (node)
2786    {
2787      struct cgraph_edge *e;
2788      gcov_type call_count = 0;
2789      gcov_type max_tp_first_run = 0;
2790      struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2791
2792      if (node->count)
2793        continue;
2794      for (e = node->callers; e; e = e->next_caller)
2795      {
2796        call_count += e->count;
2797
2798	if (e->caller->tp_first_run > max_tp_first_run)
2799	  max_tp_first_run = e->caller->tp_first_run;
2800      }
2801
2802      /* If time profile is missing, let assign the maximum that comes from
2803	 caller functions.  */
2804      if (!node->tp_first_run && max_tp_first_run)
2805	node->tp_first_run = max_tp_first_run + 1;
2806
2807      if (call_count
2808          && fn && fn->cfg
2809          && (call_count * unlikely_count_fraction >= profile_info->runs))
2810        {
2811          drop_profile (node, call_count);
2812          worklist.safe_push (node);
2813        }
2814    }
2815
2816  /* Propagate the profile dropping to other 0-count COMDATs that are
2817     potentially called by COMDATs we already dropped the profile on.  */
2818  while (worklist.length () > 0)
2819    {
2820      struct cgraph_edge *e;
2821
2822      node = worklist.pop ();
2823      for (e = node->callees; e; e = e->next_caller)
2824        {
2825          struct cgraph_node *callee = e->callee;
2826          struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2827
2828          if (callee->count > 0)
2829            continue;
2830          if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2831              && profile_status_for_fn (fn) == PROFILE_READ)
2832            {
2833              drop_profile (node, 0);
2834              worklist.safe_push (callee);
2835            }
2836        }
2837    }
2838  worklist.release ();
2839}
2840
2841/* Convert counts measured by profile driven feedback to frequencies.
2842   Return nonzero iff there was any nonzero execution count.  */
2843
2844int
2845counts_to_freqs (void)
2846{
2847  gcov_type count_max, true_count_max = 0;
2848  basic_block bb;
2849
2850  /* Don't overwrite the estimated frequencies when the profile for
2851     the function is missing.  We may drop this function PROFILE_GUESSED
2852     later in drop_profile ().  */
2853  if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2854    return 0;
2855
2856  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2857    true_count_max = MAX (bb->count, true_count_max);
2858
2859  count_max = MAX (true_count_max, 1);
2860  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2861    bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2862
2863  return true_count_max;
2864}
2865
2866/* Return true if function is likely to be expensive, so there is no point to
2867   optimize performance of prologue, epilogue or do inlining at the expense
2868   of code size growth.  THRESHOLD is the limit of number of instructions
2869   function can execute at average to be still considered not expensive.  */
2870
2871bool
2872expensive_function_p (int threshold)
2873{
2874  unsigned int sum = 0;
2875  basic_block bb;
2876  unsigned int limit;
2877
2878  /* We can not compute accurately for large thresholds due to scaled
2879     frequencies.  */
2880  gcc_assert (threshold <= BB_FREQ_MAX);
2881
2882  /* Frequencies are out of range.  This either means that function contains
2883     internal loop executing more than BB_FREQ_MAX times or profile feedback
2884     is available and function has not been executed at all.  */
2885  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2886    return true;
2887
2888  /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2889  limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2890  FOR_EACH_BB_FN (bb, cfun)
2891    {
2892      rtx_insn *insn;
2893
2894      FOR_BB_INSNS (bb, insn)
2895	if (active_insn_p (insn))
2896	  {
2897	    sum += bb->frequency;
2898	    if (sum > limit)
2899	      return true;
2900	}
2901    }
2902
2903  return false;
2904}
2905
2906/* Estimate and propagate basic block frequencies using the given branch
2907   probabilities.  If FORCE is true, the frequencies are used to estimate
2908   the counts even when there are already non-zero profile counts.  */
2909
2910void
2911estimate_bb_frequencies (bool force)
2912{
2913  basic_block bb;
2914  sreal freq_max;
2915
2916  if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2917    {
2918      static int real_values_initialized = 0;
2919
2920      if (!real_values_initialized)
2921        {
2922	  real_values_initialized = 1;
2923	  sreal_init (&real_zero, 0, 0);
2924	  sreal_init (&real_one, 1, 0);
2925	  sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2926	  sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2927	  sreal_init (&real_one_half, 1, -1);
2928	  sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2929	  sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2930	}
2931
2932      mark_dfs_back_edges ();
2933
2934      single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2935	 REG_BR_PROB_BASE;
2936
2937      /* Set up block info for each basic block.  */
2938      alloc_aux_for_blocks (sizeof (struct block_info_def));
2939      alloc_aux_for_edges (sizeof (struct edge_info_def));
2940      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2941	{
2942	  edge e;
2943	  edge_iterator ei;
2944
2945	  FOR_EACH_EDGE (e, ei, bb->succs)
2946	    {
2947	      sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2948	      sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2949			 &EDGE_INFO (e)->back_edge_prob,
2950			 &real_inv_br_prob_base);
2951	    }
2952	}
2953
2954      /* First compute frequencies locally for each loop from innermost
2955         to outermost to examine frequencies for back edges.  */
2956      estimate_loops ();
2957
2958      memcpy (&freq_max, &real_zero, sizeof (real_zero));
2959      FOR_EACH_BB_FN (bb, cfun)
2960	if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2961	  memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2962
2963      sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2964      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2965	{
2966	  sreal tmp;
2967
2968	  sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2969	  sreal_add (&tmp, &tmp, &real_one_half);
2970	  bb->frequency = sreal_to_int (&tmp);
2971	}
2972
2973      free_aux_for_blocks ();
2974      free_aux_for_edges ();
2975    }
2976  compute_function_frequency ();
2977}
2978
2979/* Decide whether function is hot, cold or unlikely executed.  */
2980void
2981compute_function_frequency (void)
2982{
2983  basic_block bb;
2984  struct cgraph_node *node = cgraph_node::get (current_function_decl);
2985
2986  if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2987      || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2988    node->only_called_at_startup = true;
2989  if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2990    node->only_called_at_exit = true;
2991
2992  if (profile_status_for_fn (cfun) != PROFILE_READ)
2993    {
2994      int flags = flags_from_decl_or_type (current_function_decl);
2995      if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
2996	  != NULL)
2997        node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
2998      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
2999	       != NULL)
3000        node->frequency = NODE_FREQUENCY_HOT;
3001      else if (flags & ECF_NORETURN)
3002        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3003      else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3004        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3005      else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3006	       || DECL_STATIC_DESTRUCTOR (current_function_decl))
3007        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3008      return;
3009    }
3010
3011  /* Only first time try to drop function into unlikely executed.
3012     After inlining the roundoff errors may confuse us.
3013     Ipa-profile pass will drop functions only called from unlikely
3014     functions to unlikely and that is most of what we care about.  */
3015  if (!cfun->after_inlining)
3016    node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3017  FOR_EACH_BB_FN (bb, cfun)
3018    {
3019      if (maybe_hot_bb_p (cfun, bb))
3020	{
3021	  node->frequency = NODE_FREQUENCY_HOT;
3022	  return;
3023	}
3024      if (!probably_never_executed_bb_p (cfun, bb))
3025	node->frequency = NODE_FREQUENCY_NORMAL;
3026    }
3027}
3028
3029/* Build PREDICT_EXPR.  */
3030tree
3031build_predict_expr (enum br_predictor predictor, enum prediction taken)
3032{
3033  tree t = build1 (PREDICT_EXPR, void_type_node,
3034		   build_int_cst (integer_type_node, predictor));
3035  SET_PREDICT_EXPR_OUTCOME (t, taken);
3036  return t;
3037}
3038
3039const char *
3040predictor_name (enum br_predictor predictor)
3041{
3042  return predictor_info[predictor].name;
3043}
3044
3045/* Predict branch probabilities and estimate profile of the tree CFG. */
3046
3047namespace {
3048
3049const pass_data pass_data_profile =
3050{
3051  GIMPLE_PASS, /* type */
3052  "profile_estimate", /* name */
3053  OPTGROUP_NONE, /* optinfo_flags */
3054  TV_BRANCH_PROB, /* tv_id */
3055  PROP_cfg, /* properties_required */
3056  0, /* properties_provided */
3057  0, /* properties_destroyed */
3058  0, /* todo_flags_start */
3059  0, /* todo_flags_finish */
3060};
3061
3062class pass_profile : public gimple_opt_pass
3063{
3064public:
3065  pass_profile (gcc::context *ctxt)
3066    : gimple_opt_pass (pass_data_profile, ctxt)
3067  {}
3068
3069  /* opt_pass methods: */
3070  virtual bool gate (function *) { return flag_guess_branch_prob; }
3071  virtual unsigned int execute (function *);
3072
3073}; // class pass_profile
3074
3075unsigned int
3076pass_profile::execute (function *fun)
3077{
3078  unsigned nb_loops;
3079
3080  loop_optimizer_init (LOOPS_NORMAL);
3081  if (dump_file && (dump_flags & TDF_DETAILS))
3082    flow_loops_dump (dump_file, NULL, 0);
3083
3084  mark_irreducible_loops ();
3085
3086  nb_loops = number_of_loops (fun);
3087  if (nb_loops > 1)
3088    scev_initialize ();
3089
3090  tree_estimate_probability ();
3091
3092  if (nb_loops > 1)
3093    scev_finalize ();
3094
3095  loop_optimizer_finalize ();
3096  if (dump_file && (dump_flags & TDF_DETAILS))
3097    gimple_dump_cfg (dump_file, dump_flags);
3098 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3099    profile_status_for_fn (fun) = PROFILE_GUESSED;
3100  return 0;
3101}
3102
3103} // anon namespace
3104
3105gimple_opt_pass *
3106make_pass_profile (gcc::context *ctxt)
3107{
3108  return new pass_profile (ctxt);
3109}
3110
3111namespace {
3112
3113const pass_data pass_data_strip_predict_hints =
3114{
3115  GIMPLE_PASS, /* type */
3116  "*strip_predict_hints", /* name */
3117  OPTGROUP_NONE, /* optinfo_flags */
3118  TV_BRANCH_PROB, /* tv_id */
3119  PROP_cfg, /* properties_required */
3120  0, /* properties_provided */
3121  0, /* properties_destroyed */
3122  0, /* todo_flags_start */
3123  0, /* todo_flags_finish */
3124};
3125
3126class pass_strip_predict_hints : public gimple_opt_pass
3127{
3128public:
3129  pass_strip_predict_hints (gcc::context *ctxt)
3130    : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3131  {}
3132
3133  /* opt_pass methods: */
3134  opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3135  virtual unsigned int execute (function *);
3136
3137}; // class pass_strip_predict_hints
3138
3139/* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3140   we no longer need.  */
3141unsigned int
3142pass_strip_predict_hints::execute (function *fun)
3143{
3144  basic_block bb;
3145  gimple ass_stmt;
3146  tree var;
3147
3148  FOR_EACH_BB_FN (bb, fun)
3149    {
3150      gimple_stmt_iterator bi;
3151      for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3152	{
3153	  gimple stmt = gsi_stmt (bi);
3154
3155	  if (gimple_code (stmt) == GIMPLE_PREDICT)
3156	    {
3157	      gsi_remove (&bi, true);
3158	      continue;
3159	    }
3160	  else if (is_gimple_call (stmt))
3161	    {
3162	      tree fndecl = gimple_call_fndecl (stmt);
3163
3164	      if ((fndecl
3165		   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3166		   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3167		   && gimple_call_num_args (stmt) == 2)
3168		  || (gimple_call_internal_p (stmt)
3169		      && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3170		{
3171		  var = gimple_call_lhs (stmt);
3172		  if (var)
3173		    {
3174		      ass_stmt
3175			= gimple_build_assign (var, gimple_call_arg (stmt, 0));
3176		      gsi_replace (&bi, ass_stmt, true);
3177		    }
3178		  else
3179		    {
3180		      gsi_remove (&bi, true);
3181		      continue;
3182		    }
3183		}
3184	    }
3185	  gsi_next (&bi);
3186	}
3187    }
3188  return 0;
3189}
3190
3191} // anon namespace
3192
3193gimple_opt_pass *
3194make_pass_strip_predict_hints (gcc::context *ctxt)
3195{
3196  return new pass_strip_predict_hints (ctxt);
3197}
3198
3199/* Rebuild function frequencies.  Passes are in general expected to
3200   maintain profile by hand, however in some cases this is not possible:
3201   for example when inlining several functions with loops freuqencies might run
3202   out of scale and thus needs to be recomputed.  */
3203
3204void
3205rebuild_frequencies (void)
3206{
3207  timevar_push (TV_REBUILD_FREQUENCIES);
3208
3209  /* When the max bb count in the function is small, there is a higher
3210     chance that there were truncation errors in the integer scaling
3211     of counts by inlining and other optimizations. This could lead
3212     to incorrect classification of code as being cold when it isn't.
3213     In that case, force the estimation of bb counts/frequencies from the
3214     branch probabilities, rather than computing frequencies from counts,
3215     which may also lead to frequencies incorrectly reduced to 0. There
3216     is less precision in the probabilities, so we only do this for small
3217     max counts.  */
3218  gcov_type count_max = 0;
3219  basic_block bb;
3220  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3221    count_max = MAX (bb->count, count_max);
3222
3223  if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3224      || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3225    {
3226      loop_optimizer_init (0);
3227      add_noreturn_fake_exit_edges ();
3228      mark_irreducible_loops ();
3229      connect_infinite_loops_to_exit ();
3230      estimate_bb_frequencies (true);
3231      remove_fake_exit_edges ();
3232      loop_optimizer_finalize ();
3233    }
3234  else if (profile_status_for_fn (cfun) == PROFILE_READ)
3235    counts_to_freqs ();
3236  else
3237    gcc_unreachable ();
3238  timevar_pop (TV_REBUILD_FREQUENCIES);
3239}
3240