1/* Branch prediction routines for the GNU compiler.
2   Copyright (C) 2000-2014 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* References:
21
22   [1] "Branch Prediction for Free"
23       Ball and Larus; PLDI '93.
24   [2] "Static Branch Frequency and Program Profile Analysis"
25       Wu and Larus; MICRO-27.
26   [3] "Corpus-based Static Branch Prediction"
27       Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
28
29
30#include "config.h"
31#include "system.h"
32#include "coretypes.h"
33#include "tm.h"
34#include "tree.h"
35#include "calls.h"
36#include "rtl.h"
37#include "tm_p.h"
38#include "hard-reg-set.h"
39#include "basic-block.h"
40#include "insn-config.h"
41#include "regs.h"
42#include "flags.h"
43#include "hashtab.h"
44#include "hash-set.h"
45#include "vec.h"
46#include "machmode.h"
47#include "input.h"
48#include "function.h"
49#include "profile.h"
50#include "except.h"
51#include "diagnostic-core.h"
52#include "recog.h"
53#include "expr.h"
54#include "predict.h"
55#include "coverage.h"
56#include "sreal.h"
57#include "params.h"
58#include "target.h"
59#include "cfgloop.h"
60#include "hash-map.h"
61#include "tree-ssa-alias.h"
62#include "internal-fn.h"
63#include "gimple-expr.h"
64#include "is-a.h"
65#include "gimple.h"
66#include "gimple-iterator.h"
67#include "gimple-ssa.h"
68#include "cgraph.h"
69#include "tree-cfg.h"
70#include "tree-phinodes.h"
71#include "ssa-iterators.h"
72#include "tree-ssa-loop-niter.h"
73#include "tree-ssa-loop.h"
74#include "tree-pass.h"
75#include "tree-scalar-evolution.h"
76#include "cfgloop.h"
77
78/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
79		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
80static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
81	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
82
83static void combine_predictions_for_insn (rtx_insn *, basic_block);
84static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
85static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
86static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
87static bool can_predict_insn_p (const rtx_insn *);
88
89/* Information we hold about each branch predictor.
90   Filled using information from predict.def.  */
91
92struct predictor_info
93{
94  const char *const name;	/* Name used in the debugging dumps.  */
95  const int hitrate;		/* Expected hitrate used by
96				   predict_insn_def call.  */
97  const int flags;
98};
99
100/* Use given predictor without Dempster-Shaffer theory if it matches
101   using first_match heuristics.  */
102#define PRED_FLAG_FIRST_MATCH 1
103
104/* Recompute hitrate in percent to our representation.  */
105
106#define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
107
108#define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
109static const struct predictor_info predictor_info[]= {
110#include "predict.def"
111
112  /* Upper bound on predictors.  */
113  {NULL, 0, 0}
114};
115#undef DEF_PREDICTOR
116
117/* Return TRUE if frequency FREQ is considered to be hot.  */
118
119static inline bool
120maybe_hot_frequency_p (struct function *fun, int freq)
121{
122  struct cgraph_node *node = cgraph_node::get (fun->decl);
123  if (!profile_info || !flag_branch_probabilities)
124    {
125      if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
126        return false;
127      if (node->frequency == NODE_FREQUENCY_HOT)
128        return true;
129    }
130  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
131    return true;
132  if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
133      && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
134    return false;
135  if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
136    return false;
137  if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
138	      / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
139    return false;
140  return true;
141}
142
143static gcov_type min_count = -1;
144
145/* Determine the threshold for hot BB counts.  */
146
147gcov_type
148get_hot_bb_threshold ()
149{
150  gcov_working_set_t *ws;
151  if (min_count == -1)
152    {
153      ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
154      gcc_assert (ws);
155      min_count = ws->min_counter;
156    }
157  return min_count;
158}
159
160/* Set the threshold for hot BB counts.  */
161
162void
163set_hot_bb_threshold (gcov_type min)
164{
165  min_count = min;
166}
167
168/* Return TRUE if frequency FREQ is considered to be hot.  */
169
170static inline bool
171maybe_hot_count_p (struct function *fun, gcov_type count)
172{
173  if (fun && profile_status_for_fn (fun) != PROFILE_READ)
174    return true;
175  /* Code executed at most once is not hot.  */
176  if (profile_info->runs >= count)
177    return false;
178  return (count >= get_hot_bb_threshold ());
179}
180
181/* Return true in case BB can be CPU intensive and should be optimized
182   for maximal performance.  */
183
184bool
185maybe_hot_bb_p (struct function *fun, const_basic_block bb)
186{
187  gcc_checking_assert (fun);
188  if (profile_status_for_fn (fun) == PROFILE_READ)
189    return maybe_hot_count_p (fun, bb->count);
190  return maybe_hot_frequency_p (fun, bb->frequency);
191}
192
193/* Return true if the call can be hot.  */
194
195bool
196cgraph_edge::maybe_hot_p (void)
197{
198  if (profile_info && flag_branch_probabilities
199      && !maybe_hot_count_p (NULL, count))
200    return false;
201  if (caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
202      || (callee
203	  && callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
204    return false;
205  if (caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
206      && (callee
207	  && callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
208    return false;
209  if (optimize_size)
210    return false;
211  if (caller->frequency == NODE_FREQUENCY_HOT)
212    return true;
213  if (caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
214      && frequency < CGRAPH_FREQ_BASE * 3 / 2)
215    return false;
216  if (flag_guess_branch_prob)
217    {
218      if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
219	  || frequency <= (CGRAPH_FREQ_BASE
220				 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
221        return false;
222    }
223  return true;
224}
225
226/* Return true in case BB can be CPU intensive and should be optimized
227   for maximal performance.  */
228
229bool
230maybe_hot_edge_p (edge e)
231{
232  if (profile_status_for_fn (cfun) == PROFILE_READ)
233    return maybe_hot_count_p (cfun, e->count);
234  return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
235}
236
237
238
239/* Return true if profile COUNT and FREQUENCY, or function FUN static
240   node frequency reflects never being executed.  */
241
242static bool
243probably_never_executed (struct function *fun,
244                         gcov_type count, int frequency)
245{
246  gcc_checking_assert (fun);
247  if (profile_status_for_fn (cfun) == PROFILE_READ)
248    {
249      int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
250      if (count * unlikely_count_fraction >= profile_info->runs)
251	return false;
252      if (!frequency)
253	return true;
254      if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
255	return false;
256      if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
257	{
258          gcov_type computed_count;
259          /* Check for possibility of overflow, in which case entry bb count
260             is large enough to do the division first without losing much
261             precision.  */
262	  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
263	      REG_BR_PROB_BASE)
264            {
265              gcov_type scaled_count
266		  = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
267	     unlikely_count_fraction;
268	      computed_count = RDIV (scaled_count,
269				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
270            }
271          else
272            {
273	      computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
274				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
275              computed_count *= frequency * unlikely_count_fraction;
276            }
277          if (computed_count >= profile_info->runs)
278            return false;
279	}
280      return true;
281    }
282  if ((!profile_info || !flag_branch_probabilities)
283      && (cgraph_node::get (fun->decl)->frequency
284	  == NODE_FREQUENCY_UNLIKELY_EXECUTED))
285    return true;
286  return false;
287}
288
289
290/* Return true in case BB is probably never executed.  */
291
292bool
293probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
294{
295  return probably_never_executed (fun, bb->count, bb->frequency);
296}
297
298
299/* Return true in case edge E is probably never executed.  */
300
301bool
302probably_never_executed_edge_p (struct function *fun, edge e)
303{
304  return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
305}
306
307/* Return true if function should be optimized for size.  */
308
309bool
310cgraph_node::optimize_for_size_p (void)
311{
312  if (optimize_size)
313    return true;
314  if (frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
315    return true;
316  else
317    return false;
318}
319
320/* Return true when current function should always be optimized for size.  */
321
322bool
323optimize_function_for_size_p (struct function *fun)
324{
325  if (optimize_size)
326    return true;
327  if (!fun || !fun->decl)
328    return false;
329
330  cgraph_node *n = cgraph_node::get (fun->decl);
331  return n && n->optimize_for_size_p ();
332}
333
334/* Return true when current function should always be optimized for speed.  */
335
336bool
337optimize_function_for_speed_p (struct function *fun)
338{
339  return !optimize_function_for_size_p (fun);
340}
341
342/* Return TRUE when BB should be optimized for size.  */
343
344bool
345optimize_bb_for_size_p (const_basic_block bb)
346{
347  return (optimize_function_for_size_p (cfun)
348	  || (bb && !maybe_hot_bb_p (cfun, bb)));
349}
350
351/* Return TRUE when BB should be optimized for speed.  */
352
353bool
354optimize_bb_for_speed_p (const_basic_block bb)
355{
356  return !optimize_bb_for_size_p (bb);
357}
358
359/* Return TRUE when BB should be optimized for size.  */
360
361bool
362optimize_edge_for_size_p (edge e)
363{
364  return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
365}
366
367/* Return TRUE when BB should be optimized for speed.  */
368
369bool
370optimize_edge_for_speed_p (edge e)
371{
372  return !optimize_edge_for_size_p (e);
373}
374
375/* Return TRUE when BB should be optimized for size.  */
376
377bool
378optimize_insn_for_size_p (void)
379{
380  return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
381}
382
383/* Return TRUE when BB should be optimized for speed.  */
384
385bool
386optimize_insn_for_speed_p (void)
387{
388  return !optimize_insn_for_size_p ();
389}
390
391/* Return TRUE when LOOP should be optimized for size.  */
392
393bool
394optimize_loop_for_size_p (struct loop *loop)
395{
396  return optimize_bb_for_size_p (loop->header);
397}
398
399/* Return TRUE when LOOP should be optimized for speed.  */
400
401bool
402optimize_loop_for_speed_p (struct loop *loop)
403{
404  return optimize_bb_for_speed_p (loop->header);
405}
406
407/* Return TRUE when LOOP nest should be optimized for speed.  */
408
409bool
410optimize_loop_nest_for_speed_p (struct loop *loop)
411{
412  struct loop *l = loop;
413  if (optimize_loop_for_speed_p (loop))
414    return true;
415  l = loop->inner;
416  while (l && l != loop)
417    {
418      if (optimize_loop_for_speed_p (l))
419        return true;
420      if (l->inner)
421        l = l->inner;
422      else if (l->next)
423        l = l->next;
424      else
425        {
426	  while (l != loop && !l->next)
427	    l = loop_outer (l);
428	  if (l != loop)
429	    l = l->next;
430	}
431    }
432  return false;
433}
434
435/* Return TRUE when LOOP nest should be optimized for size.  */
436
437bool
438optimize_loop_nest_for_size_p (struct loop *loop)
439{
440  return !optimize_loop_nest_for_speed_p (loop);
441}
442
443/* Return true when edge E is likely to be well predictable by branch
444   predictor.  */
445
446bool
447predictable_edge_p (edge e)
448{
449  if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
450    return false;
451  if ((e->probability
452       <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
453      || (REG_BR_PROB_BASE - e->probability
454          <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
455    return true;
456  return false;
457}
458
459
460/* Set RTL expansion for BB profile.  */
461
462void
463rtl_profile_for_bb (basic_block bb)
464{
465  crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
466}
467
468/* Set RTL expansion for edge profile.  */
469
470void
471rtl_profile_for_edge (edge e)
472{
473  crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
474}
475
476/* Set RTL expansion to default mode (i.e. when profile info is not known).  */
477void
478default_rtl_profile (void)
479{
480  crtl->maybe_hot_insn_p = true;
481}
482
483/* Return true if the one of outgoing edges is already predicted by
484   PREDICTOR.  */
485
486bool
487rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
488{
489  rtx note;
490  if (!INSN_P (BB_END (bb)))
491    return false;
492  for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
493    if (REG_NOTE_KIND (note) == REG_BR_PRED
494	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
495      return true;
496  return false;
497}
498
499/*  Structure representing predictions in tree level. */
500
501struct edge_prediction {
502    struct edge_prediction *ep_next;
503    edge ep_edge;
504    enum br_predictor ep_predictor;
505    int ep_probability;
506};
507
508/* This map contains for a basic block the list of predictions for the
509   outgoing edges.  */
510
511static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
512
513/* Return true if the one of outgoing edges is already predicted by
514   PREDICTOR.  */
515
516bool
517gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
518{
519  struct edge_prediction *i;
520  edge_prediction **preds = bb_predictions->get (bb);
521
522  if (!preds)
523    return false;
524
525  for (i = *preds; i; i = i->ep_next)
526    if (i->ep_predictor == predictor)
527      return true;
528  return false;
529}
530
531/* Return true when the probability of edge is reliable.
532
533   The profile guessing code is good at predicting branch outcome (ie.
534   taken/not taken), that is predicted right slightly over 75% of time.
535   It is however notoriously poor on predicting the probability itself.
536   In general the profile appear a lot flatter (with probabilities closer
537   to 50%) than the reality so it is bad idea to use it to drive optimization
538   such as those disabling dynamic branch prediction for well predictable
539   branches.
540
541   There are two exceptions - edges leading to noreturn edges and edges
542   predicted by number of iterations heuristics are predicted well.  This macro
543   should be able to distinguish those, but at the moment it simply check for
544   noreturn heuristic that is only one giving probability over 99% or bellow
545   1%.  In future we might want to propagate reliability information across the
546   CFG if we find this information useful on multiple places.   */
547static bool
548probability_reliable_p (int prob)
549{
550  return (profile_status_for_fn (cfun) == PROFILE_READ
551	  || (profile_status_for_fn (cfun) == PROFILE_GUESSED
552	      && (prob <= HITRATE (1) || prob >= HITRATE (99))));
553}
554
555/* Same predicate as above, working on edges.  */
556bool
557edge_probability_reliable_p (const_edge e)
558{
559  return probability_reliable_p (e->probability);
560}
561
562/* Same predicate as edge_probability_reliable_p, working on notes.  */
563bool
564br_prob_note_reliable_p (const_rtx note)
565{
566  gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
567  return probability_reliable_p (XINT (note, 0));
568}
569
570static void
571predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
572{
573  gcc_assert (any_condjump_p (insn));
574  if (!flag_guess_branch_prob)
575    return;
576
577  add_reg_note (insn, REG_BR_PRED,
578		gen_rtx_CONCAT (VOIDmode,
579				GEN_INT ((int) predictor),
580				GEN_INT ((int) probability)));
581}
582
583/* Predict insn by given predictor.  */
584
585void
586predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
587		  enum prediction taken)
588{
589   int probability = predictor_info[(int) predictor].hitrate;
590
591   if (taken != TAKEN)
592     probability = REG_BR_PROB_BASE - probability;
593
594   predict_insn (insn, predictor, probability);
595}
596
597/* Predict edge E with given probability if possible.  */
598
599void
600rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
601{
602  rtx_insn *last_insn;
603  last_insn = BB_END (e->src);
604
605  /* We can store the branch prediction information only about
606     conditional jumps.  */
607  if (!any_condjump_p (last_insn))
608    return;
609
610  /* We always store probability of branching.  */
611  if (e->flags & EDGE_FALLTHRU)
612    probability = REG_BR_PROB_BASE - probability;
613
614  predict_insn (last_insn, predictor, probability);
615}
616
617/* Predict edge E with the given PROBABILITY.  */
618void
619gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
620{
621  gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
622  if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
623       1)
624      && flag_guess_branch_prob && optimize)
625    {
626      struct edge_prediction *i = XNEW (struct edge_prediction);
627      edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
628
629      i->ep_next = preds;
630      preds = i;
631      i->ep_probability = probability;
632      i->ep_predictor = predictor;
633      i->ep_edge = e;
634    }
635}
636
637/* Remove all predictions on given basic block that are attached
638   to edge E.  */
639void
640remove_predictions_associated_with_edge (edge e)
641{
642  if (!bb_predictions)
643    return;
644
645  edge_prediction **preds = bb_predictions->get (e->src);
646
647  if (preds)
648    {
649      struct edge_prediction **prediction = preds;
650      struct edge_prediction *next;
651
652      while (*prediction)
653	{
654	  if ((*prediction)->ep_edge == e)
655	    {
656	      next = (*prediction)->ep_next;
657	      free (*prediction);
658	      *prediction = next;
659	    }
660	  else
661	    prediction = &((*prediction)->ep_next);
662	}
663    }
664}
665
666/* Clears the list of predictions stored for BB.  */
667
668static void
669clear_bb_predictions (basic_block bb)
670{
671  edge_prediction **preds = bb_predictions->get (bb);
672  struct edge_prediction *pred, *next;
673
674  if (!preds)
675    return;
676
677  for (pred = *preds; pred; pred = next)
678    {
679      next = pred->ep_next;
680      free (pred);
681    }
682  *preds = NULL;
683}
684
685/* Return true when we can store prediction on insn INSN.
686   At the moment we represent predictions only on conditional
687   jumps, not at computed jump or other complicated cases.  */
688static bool
689can_predict_insn_p (const rtx_insn *insn)
690{
691  return (JUMP_P (insn)
692	  && any_condjump_p (insn)
693	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
694}
695
696/* Predict edge E by given predictor if possible.  */
697
698void
699predict_edge_def (edge e, enum br_predictor predictor,
700		  enum prediction taken)
701{
702   int probability = predictor_info[(int) predictor].hitrate;
703
704   if (taken != TAKEN)
705     probability = REG_BR_PROB_BASE - probability;
706
707   predict_edge (e, predictor, probability);
708}
709
710/* Invert all branch predictions or probability notes in the INSN.  This needs
711   to be done each time we invert the condition used by the jump.  */
712
713void
714invert_br_probabilities (rtx insn)
715{
716  rtx note;
717
718  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
719    if (REG_NOTE_KIND (note) == REG_BR_PROB)
720      XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
721    else if (REG_NOTE_KIND (note) == REG_BR_PRED)
722      XEXP (XEXP (note, 0), 1)
723	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
724}
725
726/* Dump information about the branch prediction to the output file.  */
727
728static void
729dump_prediction (FILE *file, enum br_predictor predictor, int probability,
730		 basic_block bb, int used)
731{
732  edge e;
733  edge_iterator ei;
734
735  if (!file)
736    return;
737
738  FOR_EACH_EDGE (e, ei, bb->succs)
739    if (! (e->flags & EDGE_FALLTHRU))
740      break;
741
742  fprintf (file, "  %s heuristics%s: %.1f%%",
743	   predictor_info[predictor].name,
744	   used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
745
746  if (bb->count)
747    {
748      fprintf (file, "  exec %"PRId64, bb->count);
749      if (e)
750	{
751	  fprintf (file, " hit %"PRId64, e->count);
752	  fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
753	}
754    }
755
756  fprintf (file, "\n");
757}
758
759/* We can not predict the probabilities of outgoing edges of bb.  Set them
760   evenly and hope for the best.  */
761static void
762set_even_probabilities (basic_block bb)
763{
764  int nedges = 0;
765  edge e;
766  edge_iterator ei;
767
768  FOR_EACH_EDGE (e, ei, bb->succs)
769    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
770      nedges ++;
771  FOR_EACH_EDGE (e, ei, bb->succs)
772    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
773      e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
774    else
775      e->probability = 0;
776}
777
778/* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
779   note if not already present.  Remove now useless REG_BR_PRED notes.  */
780
781static void
782combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
783{
784  rtx prob_note;
785  rtx *pnote;
786  rtx note;
787  int best_probability = PROB_EVEN;
788  enum br_predictor best_predictor = END_PREDICTORS;
789  int combined_probability = REG_BR_PROB_BASE / 2;
790  int d;
791  bool first_match = false;
792  bool found = false;
793
794  if (!can_predict_insn_p (insn))
795    {
796      set_even_probabilities (bb);
797      return;
798    }
799
800  prob_note = find_reg_note (insn, REG_BR_PROB, 0);
801  pnote = &REG_NOTES (insn);
802  if (dump_file)
803    fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
804	     bb->index);
805
806  /* We implement "first match" heuristics and use probability guessed
807     by predictor with smallest index.  */
808  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
809    if (REG_NOTE_KIND (note) == REG_BR_PRED)
810      {
811	enum br_predictor predictor = ((enum br_predictor)
812				       INTVAL (XEXP (XEXP (note, 0), 0)));
813	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
814
815	found = true;
816	if (best_predictor > predictor)
817	  best_probability = probability, best_predictor = predictor;
818
819	d = (combined_probability * probability
820	     + (REG_BR_PROB_BASE - combined_probability)
821	     * (REG_BR_PROB_BASE - probability));
822
823	/* Use FP math to avoid overflows of 32bit integers.  */
824	if (d == 0)
825	  /* If one probability is 0% and one 100%, avoid division by zero.  */
826	  combined_probability = REG_BR_PROB_BASE / 2;
827	else
828	  combined_probability = (((double) combined_probability) * probability
829				  * REG_BR_PROB_BASE / d + 0.5);
830      }
831
832  /* Decide which heuristic to use.  In case we didn't match anything,
833     use no_prediction heuristic, in case we did match, use either
834     first match or Dempster-Shaffer theory depending on the flags.  */
835
836  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
837    first_match = true;
838
839  if (!found)
840    dump_prediction (dump_file, PRED_NO_PREDICTION,
841		     combined_probability, bb, true);
842  else
843    {
844      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
845		       bb, !first_match);
846      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
847		       bb, first_match);
848    }
849
850  if (first_match)
851    combined_probability = best_probability;
852  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
853
854  while (*pnote)
855    {
856      if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
857	{
858	  enum br_predictor predictor = ((enum br_predictor)
859					 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
860	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
861
862	  dump_prediction (dump_file, predictor, probability, bb,
863			   !first_match || best_predictor == predictor);
864	  *pnote = XEXP (*pnote, 1);
865	}
866      else
867	pnote = &XEXP (*pnote, 1);
868    }
869
870  if (!prob_note)
871    {
872      add_int_reg_note (insn, REG_BR_PROB, combined_probability);
873
874      /* Save the prediction into CFG in case we are seeing non-degenerated
875	 conditional jump.  */
876      if (!single_succ_p (bb))
877	{
878	  BRANCH_EDGE (bb)->probability = combined_probability;
879	  FALLTHRU_EDGE (bb)->probability
880	    = REG_BR_PROB_BASE - combined_probability;
881	}
882    }
883  else if (!single_succ_p (bb))
884    {
885      int prob = XINT (prob_note, 0);
886
887      BRANCH_EDGE (bb)->probability = prob;
888      FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
889    }
890  else
891    single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
892}
893
894/* Combine predictions into single probability and store them into CFG.
895   Remove now useless prediction entries.  */
896
897static void
898combine_predictions_for_bb (basic_block bb)
899{
900  int best_probability = PROB_EVEN;
901  enum br_predictor best_predictor = END_PREDICTORS;
902  int combined_probability = REG_BR_PROB_BASE / 2;
903  int d;
904  bool first_match = false;
905  bool found = false;
906  struct edge_prediction *pred;
907  int nedges = 0;
908  edge e, first = NULL, second = NULL;
909  edge_iterator ei;
910
911  FOR_EACH_EDGE (e, ei, bb->succs)
912    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
913      {
914	nedges ++;
915	if (first && !second)
916	  second = e;
917	if (!first)
918	  first = e;
919      }
920
921  /* When there is no successor or only one choice, prediction is easy.
922
923     We are lazy for now and predict only basic blocks with two outgoing
924     edges.  It is possible to predict generic case too, but we have to
925     ignore first match heuristics and do more involved combining.  Implement
926     this later.  */
927  if (nedges != 2)
928    {
929      if (!bb->count)
930	set_even_probabilities (bb);
931      clear_bb_predictions (bb);
932      if (dump_file)
933	fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
934		 nedges, bb->index);
935      return;
936    }
937
938  if (dump_file)
939    fprintf (dump_file, "Predictions for bb %i\n", bb->index);
940
941  edge_prediction **preds = bb_predictions->get (bb);
942  if (preds)
943    {
944      /* We implement "first match" heuristics and use probability guessed
945	 by predictor with smallest index.  */
946      for (pred = *preds; pred; pred = pred->ep_next)
947	{
948	  enum br_predictor predictor = pred->ep_predictor;
949	  int probability = pred->ep_probability;
950
951	  if (pred->ep_edge != first)
952	    probability = REG_BR_PROB_BASE - probability;
953
954	  found = true;
955	  /* First match heuristics would be widly confused if we predicted
956	     both directions.  */
957	  if (best_predictor > predictor)
958	    {
959              struct edge_prediction *pred2;
960	      int prob = probability;
961
962	      for (pred2 = (struct edge_prediction *) *preds;
963		   pred2; pred2 = pred2->ep_next)
964	       if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
965	         {
966	           int probability2 = pred->ep_probability;
967
968		   if (pred2->ep_edge != first)
969		     probability2 = REG_BR_PROB_BASE - probability2;
970
971		   if ((probability < REG_BR_PROB_BASE / 2) !=
972		       (probability2 < REG_BR_PROB_BASE / 2))
973		     break;
974
975		   /* If the same predictor later gave better result, go for it! */
976		   if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
977		       || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
978		     prob = probability2;
979		 }
980	      if (!pred2)
981	        best_probability = prob, best_predictor = predictor;
982	    }
983
984	  d = (combined_probability * probability
985	       + (REG_BR_PROB_BASE - combined_probability)
986	       * (REG_BR_PROB_BASE - probability));
987
988	  /* Use FP math to avoid overflows of 32bit integers.  */
989	  if (d == 0)
990	    /* If one probability is 0% and one 100%, avoid division by zero.  */
991	    combined_probability = REG_BR_PROB_BASE / 2;
992	  else
993	    combined_probability = (((double) combined_probability)
994				    * probability
995		    		    * REG_BR_PROB_BASE / d + 0.5);
996	}
997    }
998
999  /* Decide which heuristic to use.  In case we didn't match anything,
1000     use no_prediction heuristic, in case we did match, use either
1001     first match or Dempster-Shaffer theory depending on the flags.  */
1002
1003  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
1004    first_match = true;
1005
1006  if (!found)
1007    dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1008  else
1009    {
1010      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1011		       !first_match);
1012      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1013		       first_match);
1014    }
1015
1016  if (first_match)
1017    combined_probability = best_probability;
1018  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1019
1020  if (preds)
1021    {
1022      for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1023	{
1024	  enum br_predictor predictor = pred->ep_predictor;
1025	  int probability = pred->ep_probability;
1026
1027	  if (pred->ep_edge != EDGE_SUCC (bb, 0))
1028	    probability = REG_BR_PROB_BASE - probability;
1029	  dump_prediction (dump_file, predictor, probability, bb,
1030			   !first_match || best_predictor == predictor);
1031	}
1032    }
1033  clear_bb_predictions (bb);
1034
1035  if (!bb->count)
1036    {
1037      first->probability = combined_probability;
1038      second->probability = REG_BR_PROB_BASE - combined_probability;
1039    }
1040}
1041
1042/* Check if T1 and T2 satisfy the IV_COMPARE condition.
1043   Return the SSA_NAME if the condition satisfies, NULL otherwise.
1044
1045   T1 and T2 should be one of the following cases:
1046     1. T1 is SSA_NAME, T2 is NULL
1047     2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1048     3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1049
1050static tree
1051strips_small_constant (tree t1, tree t2)
1052{
1053  tree ret = NULL;
1054  int value = 0;
1055
1056  if (!t1)
1057    return NULL;
1058  else if (TREE_CODE (t1) == SSA_NAME)
1059    ret = t1;
1060  else if (tree_fits_shwi_p (t1))
1061    value = tree_to_shwi (t1);
1062  else
1063    return NULL;
1064
1065  if (!t2)
1066    return ret;
1067  else if (tree_fits_shwi_p (t2))
1068    value = tree_to_shwi (t2);
1069  else if (TREE_CODE (t2) == SSA_NAME)
1070    {
1071      if (ret)
1072        return NULL;
1073      else
1074        ret = t2;
1075    }
1076
1077  if (value <= 4 && value >= -4)
1078    return ret;
1079  else
1080    return NULL;
1081}
1082
1083/* Return the SSA_NAME in T or T's operands.
1084   Return NULL if SSA_NAME cannot be found.  */
1085
1086static tree
1087get_base_value (tree t)
1088{
1089  if (TREE_CODE (t) == SSA_NAME)
1090    return t;
1091
1092  if (!BINARY_CLASS_P (t))
1093    return NULL;
1094
1095  switch (TREE_OPERAND_LENGTH (t))
1096    {
1097    case 1:
1098      return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1099    case 2:
1100      return strips_small_constant (TREE_OPERAND (t, 0),
1101				    TREE_OPERAND (t, 1));
1102    default:
1103      return NULL;
1104    }
1105}
1106
1107/* Check the compare STMT in LOOP. If it compares an induction
1108   variable to a loop invariant, return true, and save
1109   LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1110   Otherwise return false and set LOOP_INVAIANT to NULL.  */
1111
1112static bool
1113is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1114				     tree *loop_invariant,
1115				     enum tree_code *compare_code,
1116				     tree *loop_step,
1117				     tree *loop_iv_base)
1118{
1119  tree op0, op1, bound, base;
1120  affine_iv iv0, iv1;
1121  enum tree_code code;
1122  tree step;
1123
1124  code = gimple_cond_code (stmt);
1125  *loop_invariant = NULL;
1126
1127  switch (code)
1128    {
1129    case GT_EXPR:
1130    case GE_EXPR:
1131    case NE_EXPR:
1132    case LT_EXPR:
1133    case LE_EXPR:
1134    case EQ_EXPR:
1135      break;
1136
1137    default:
1138      return false;
1139    }
1140
1141  op0 = gimple_cond_lhs (stmt);
1142  op1 = gimple_cond_rhs (stmt);
1143
1144  if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1145       || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1146    return false;
1147  if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1148    return false;
1149  if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1150    return false;
1151  if (TREE_CODE (iv0.step) != INTEGER_CST
1152      || TREE_CODE (iv1.step) != INTEGER_CST)
1153    return false;
1154  if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1155      || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1156    return false;
1157
1158  if (integer_zerop (iv0.step))
1159    {
1160      if (code != NE_EXPR && code != EQ_EXPR)
1161	code = invert_tree_comparison (code, false);
1162      bound = iv0.base;
1163      base = iv1.base;
1164      if (tree_fits_shwi_p (iv1.step))
1165	step = iv1.step;
1166      else
1167	return false;
1168    }
1169  else
1170    {
1171      bound = iv1.base;
1172      base = iv0.base;
1173      if (tree_fits_shwi_p (iv0.step))
1174	step = iv0.step;
1175      else
1176	return false;
1177    }
1178
1179  if (TREE_CODE (bound) != INTEGER_CST)
1180    bound = get_base_value (bound);
1181  if (!bound)
1182    return false;
1183  if (TREE_CODE (base) != INTEGER_CST)
1184    base = get_base_value (base);
1185  if (!base)
1186    return false;
1187
1188  *loop_invariant = bound;
1189  *compare_code = code;
1190  *loop_step = step;
1191  *loop_iv_base = base;
1192  return true;
1193}
1194
1195/* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1196
1197static bool
1198expr_coherent_p (tree t1, tree t2)
1199{
1200  gimple stmt;
1201  tree ssa_name_1 = NULL;
1202  tree ssa_name_2 = NULL;
1203
1204  gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1205  gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1206
1207  if (t1 == t2)
1208    return true;
1209
1210  if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1211    return true;
1212  if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1213    return false;
1214
1215  /* Check to see if t1 is expressed/defined with t2.  */
1216  stmt = SSA_NAME_DEF_STMT (t1);
1217  gcc_assert (stmt != NULL);
1218  if (is_gimple_assign (stmt))
1219    {
1220      ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1221      if (ssa_name_1 && ssa_name_1 == t2)
1222	return true;
1223    }
1224
1225  /* Check to see if t2 is expressed/defined with t1.  */
1226  stmt = SSA_NAME_DEF_STMT (t2);
1227  gcc_assert (stmt != NULL);
1228  if (is_gimple_assign (stmt))
1229    {
1230      ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1231      if (ssa_name_2 && ssa_name_2 == t1)
1232	return true;
1233    }
1234
1235  /* Compare if t1 and t2's def_stmts are identical.  */
1236  if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1237    return true;
1238  else
1239    return false;
1240}
1241
1242/* Predict branch probability of BB when BB contains a branch that compares
1243   an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1244   loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1245
1246   E.g.
1247     for (int i = 0; i < bound; i++) {
1248       if (i < bound - 2)
1249	 computation_1();
1250       else
1251	 computation_2();
1252     }
1253
1254  In this loop, we will predict the branch inside the loop to be taken.  */
1255
1256static void
1257predict_iv_comparison (struct loop *loop, basic_block bb,
1258		       tree loop_bound_var,
1259		       tree loop_iv_base_var,
1260		       enum tree_code loop_bound_code,
1261		       int loop_bound_step)
1262{
1263  gimple stmt;
1264  tree compare_var, compare_base;
1265  enum tree_code compare_code;
1266  tree compare_step_var;
1267  edge then_edge;
1268  edge_iterator ei;
1269
1270  if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1271      || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1272      || predicted_by_p (bb, PRED_LOOP_EXIT))
1273    return;
1274
1275  stmt = last_stmt (bb);
1276  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1277    return;
1278  if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1279					    &compare_code,
1280					    &compare_step_var,
1281					    &compare_base))
1282    return;
1283
1284  /* Find the taken edge.  */
1285  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1286    if (then_edge->flags & EDGE_TRUE_VALUE)
1287      break;
1288
1289  /* When comparing an IV to a loop invariant, NE is more likely to be
1290     taken while EQ is more likely to be not-taken.  */
1291  if (compare_code == NE_EXPR)
1292    {
1293      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1294      return;
1295    }
1296  else if (compare_code == EQ_EXPR)
1297    {
1298      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1299      return;
1300    }
1301
1302  if (!expr_coherent_p (loop_iv_base_var, compare_base))
1303    return;
1304
1305  /* If loop bound, base and compare bound are all constants, we can
1306     calculate the probability directly.  */
1307  if (tree_fits_shwi_p (loop_bound_var)
1308      && tree_fits_shwi_p (compare_var)
1309      && tree_fits_shwi_p (compare_base))
1310    {
1311      int probability;
1312      bool overflow, overall_overflow = false;
1313      widest_int compare_count, tem;
1314
1315      /* (loop_bound - base) / compare_step */
1316      tem = wi::sub (wi::to_widest (loop_bound_var),
1317		     wi::to_widest (compare_base), SIGNED, &overflow);
1318      overall_overflow |= overflow;
1319      widest_int loop_count = wi::div_trunc (tem,
1320					     wi::to_widest (compare_step_var),
1321					     SIGNED, &overflow);
1322      overall_overflow |= overflow;
1323
1324      if (!wi::neg_p (wi::to_widest (compare_step_var))
1325          ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1326	{
1327	  /* (loop_bound - compare_bound) / compare_step */
1328	  tem = wi::sub (wi::to_widest (loop_bound_var),
1329			 wi::to_widest (compare_var), SIGNED, &overflow);
1330	  overall_overflow |= overflow;
1331	  compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1332					 SIGNED, &overflow);
1333	  overall_overflow |= overflow;
1334	}
1335      else
1336        {
1337	  /* (compare_bound - base) / compare_step */
1338	  tem = wi::sub (wi::to_widest (compare_var),
1339			 wi::to_widest (compare_base), SIGNED, &overflow);
1340	  overall_overflow |= overflow;
1341          compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1342					 SIGNED, &overflow);
1343	  overall_overflow |= overflow;
1344	}
1345      if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1346	++compare_count;
1347      if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1348	++loop_count;
1349      if (wi::neg_p (compare_count))
1350        compare_count = 0;
1351      if (wi::neg_p (loop_count))
1352        loop_count = 0;
1353      if (loop_count == 0)
1354	probability = 0;
1355      else if (wi::cmps (compare_count, loop_count) == 1)
1356	probability = REG_BR_PROB_BASE;
1357      else
1358        {
1359	  tem = compare_count * REG_BR_PROB_BASE;
1360	  tem = wi::udiv_trunc (tem, loop_count);
1361	  probability = tem.to_uhwi ();
1362	}
1363
1364      if (!overall_overflow)
1365        predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1366
1367      return;
1368    }
1369
1370  if (expr_coherent_p (loop_bound_var, compare_var))
1371    {
1372      if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1373	  && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1374	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1375      else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1376	       && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1377	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1378      else if (loop_bound_code == NE_EXPR)
1379	{
1380	  /* If the loop backedge condition is "(i != bound)", we do
1381	     the comparison based on the step of IV:
1382	     * step < 0 : backedge condition is like (i > bound)
1383	     * step > 0 : backedge condition is like (i < bound)  */
1384	  gcc_assert (loop_bound_step != 0);
1385	  if (loop_bound_step > 0
1386	      && (compare_code == LT_EXPR
1387		  || compare_code == LE_EXPR))
1388	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1389	  else if (loop_bound_step < 0
1390		   && (compare_code == GT_EXPR
1391		       || compare_code == GE_EXPR))
1392	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1393	  else
1394	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1395	}
1396      else
1397	/* The branch is predicted not-taken if loop_bound_code is
1398	   opposite with compare_code.  */
1399	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1400    }
1401  else if (expr_coherent_p (loop_iv_base_var, compare_var))
1402    {
1403      /* For cases like:
1404	   for (i = s; i < h; i++)
1405	     if (i > s + 2) ....
1406	 The branch should be predicted taken.  */
1407      if (loop_bound_step > 0
1408	  && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1409	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1410      else if (loop_bound_step < 0
1411	       && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1412	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1413      else
1414	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1415    }
1416}
1417
1418/* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1419   exits are resulted from short-circuit conditions that will generate an
1420   if_tmp. E.g.:
1421
1422   if (foo() || global > 10)
1423     break;
1424
1425   This will be translated into:
1426
1427   BB3:
1428     loop header...
1429   BB4:
1430     if foo() goto BB6 else goto BB5
1431   BB5:
1432     if global > 10 goto BB6 else goto BB7
1433   BB6:
1434     goto BB7
1435   BB7:
1436     iftmp = (PHI 0(BB5), 1(BB6))
1437     if iftmp == 1 goto BB8 else goto BB3
1438   BB8:
1439     outside of the loop...
1440
1441   The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1442   From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1443   exits. This function takes BB7->BB8 as input, and finds out the extra loop
1444   exits to predict them using PRED_LOOP_EXIT.  */
1445
1446static void
1447predict_extra_loop_exits (edge exit_edge)
1448{
1449  unsigned i;
1450  bool check_value_one;
1451  gimple phi_stmt;
1452  tree cmp_rhs, cmp_lhs;
1453  gimple cmp_stmt = last_stmt (exit_edge->src);
1454
1455  if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1456    return;
1457  cmp_rhs = gimple_cond_rhs (cmp_stmt);
1458  cmp_lhs = gimple_cond_lhs (cmp_stmt);
1459  if (!TREE_CONSTANT (cmp_rhs)
1460      || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1461    return;
1462  if (TREE_CODE (cmp_lhs) != SSA_NAME)
1463    return;
1464
1465  /* If check_value_one is true, only the phi_args with value '1' will lead
1466     to loop exit. Otherwise, only the phi_args with value '0' will lead to
1467     loop exit.  */
1468  check_value_one = (((integer_onep (cmp_rhs))
1469		    ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1470		    ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1471
1472  phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1473  if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1474    return;
1475
1476  for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1477    {
1478      edge e1;
1479      edge_iterator ei;
1480      tree val = gimple_phi_arg_def (phi_stmt, i);
1481      edge e = gimple_phi_arg_edge (phi_stmt, i);
1482
1483      if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1484	continue;
1485      if ((check_value_one ^ integer_onep (val)) == 1)
1486	continue;
1487      if (EDGE_COUNT (e->src->succs) != 1)
1488	{
1489	  predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1490	  continue;
1491	}
1492
1493      FOR_EACH_EDGE (e1, ei, e->src->preds)
1494	predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1495    }
1496}
1497
1498/* Predict edge probabilities by exploiting loop structure.  */
1499
1500static void
1501predict_loops (void)
1502{
1503  struct loop *loop;
1504
1505  /* Try to predict out blocks in a loop that are not part of a
1506     natural loop.  */
1507  FOR_EACH_LOOP (loop, 0)
1508    {
1509      basic_block bb, *bbs;
1510      unsigned j, n_exits;
1511      vec<edge> exits;
1512      struct tree_niter_desc niter_desc;
1513      edge ex;
1514      struct nb_iter_bound *nb_iter;
1515      enum tree_code loop_bound_code = ERROR_MARK;
1516      tree loop_bound_step = NULL;
1517      tree loop_bound_var = NULL;
1518      tree loop_iv_base = NULL;
1519      gimple stmt = NULL;
1520
1521      exits = get_loop_exit_edges (loop);
1522      n_exits = exits.length ();
1523      if (!n_exits)
1524	{
1525          exits.release ();
1526	  continue;
1527	}
1528
1529      FOR_EACH_VEC_ELT (exits, j, ex)
1530	{
1531	  tree niter = NULL;
1532	  HOST_WIDE_INT nitercst;
1533	  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1534	  int probability;
1535	  enum br_predictor predictor;
1536
1537	  predict_extra_loop_exits (ex);
1538
1539	  if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1540	    niter = niter_desc.niter;
1541	  if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1542	    niter = loop_niter_by_eval (loop, ex);
1543
1544	  if (TREE_CODE (niter) == INTEGER_CST)
1545	    {
1546	      if (tree_fits_uhwi_p (niter)
1547		  && max
1548		  && compare_tree_int (niter, max - 1) == -1)
1549		nitercst = tree_to_uhwi (niter) + 1;
1550	      else
1551		nitercst = max;
1552	      predictor = PRED_LOOP_ITERATIONS;
1553	    }
1554	  /* If we have just one exit and we can derive some information about
1555	     the number of iterations of the loop from the statements inside
1556	     the loop, use it to predict this exit.  */
1557	  else if (n_exits == 1)
1558	    {
1559	      nitercst = estimated_stmt_executions_int (loop);
1560	      if (nitercst < 0)
1561		continue;
1562	      if (nitercst > max)
1563		nitercst = max;
1564
1565	      predictor = PRED_LOOP_ITERATIONS_GUESSED;
1566	    }
1567	  else
1568	    continue;
1569
1570	  /* If the prediction for number of iterations is zero, do not
1571	     predict the exit edges.  */
1572	  if (nitercst == 0)
1573	    continue;
1574
1575	  probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1576	  predict_edge (ex, predictor, probability);
1577	}
1578      exits.release ();
1579
1580      /* Find information about loop bound variables.  */
1581      for (nb_iter = loop->bounds; nb_iter;
1582	   nb_iter = nb_iter->next)
1583	if (nb_iter->stmt
1584	    && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1585	  {
1586	    stmt = nb_iter->stmt;
1587	    break;
1588	  }
1589      if (!stmt && last_stmt (loop->header)
1590	  && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1591	stmt = last_stmt (loop->header);
1592      if (stmt)
1593	is_comparison_with_loop_invariant_p (stmt, loop,
1594					     &loop_bound_var,
1595					     &loop_bound_code,
1596					     &loop_bound_step,
1597					     &loop_iv_base);
1598
1599      bbs = get_loop_body (loop);
1600
1601      for (j = 0; j < loop->num_nodes; j++)
1602	{
1603	  int header_found = 0;
1604	  edge e;
1605	  edge_iterator ei;
1606
1607	  bb = bbs[j];
1608
1609	  /* Bypass loop heuristics on continue statement.  These
1610	     statements construct loops via "non-loop" constructs
1611	     in the source language and are better to be handled
1612	     separately.  */
1613	  if (predicted_by_p (bb, PRED_CONTINUE))
1614	    continue;
1615
1616	  /* Loop branch heuristics - predict an edge back to a
1617	     loop's head as taken.  */
1618	  if (bb == loop->latch)
1619	    {
1620	      e = find_edge (loop->latch, loop->header);
1621	      if (e)
1622		{
1623		  header_found = 1;
1624		  predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1625		}
1626	    }
1627
1628	  /* Loop exit heuristics - predict an edge exiting the loop if the
1629	     conditional has no loop header successors as not taken.  */
1630	  if (!header_found
1631	      /* If we already used more reliable loop exit predictors, do not
1632		 bother with PRED_LOOP_EXIT.  */
1633	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1634	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1635	    {
1636	      /* For loop with many exits we don't want to predict all exits
1637	         with the pretty large probability, because if all exits are
1638		 considered in row, the loop would be predicted to iterate
1639		 almost never.  The code to divide probability by number of
1640		 exits is very rough.  It should compute the number of exits
1641		 taken in each patch through function (not the overall number
1642		 of exits that might be a lot higher for loops with wide switch
1643		 statements in them) and compute n-th square root.
1644
1645		 We limit the minimal probability by 2% to avoid
1646		 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1647		 as this was causing regression in perl benchmark containing such
1648		 a wide loop.  */
1649
1650	      int probability = ((REG_BR_PROB_BASE
1651		                  - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1652				 / n_exits);
1653	      if (probability < HITRATE (2))
1654		probability = HITRATE (2);
1655	      FOR_EACH_EDGE (e, ei, bb->succs)
1656		if (e->dest->index < NUM_FIXED_BLOCKS
1657		    || !flow_bb_inside_loop_p (loop, e->dest))
1658		  predict_edge (e, PRED_LOOP_EXIT, probability);
1659	    }
1660	  if (loop_bound_var)
1661	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1662				   loop_bound_code,
1663				   tree_to_shwi (loop_bound_step));
1664	}
1665
1666      /* Free basic blocks from get_loop_body.  */
1667      free (bbs);
1668    }
1669}
1670
1671/* Attempt to predict probabilities of BB outgoing edges using local
1672   properties.  */
1673static void
1674bb_estimate_probability_locally (basic_block bb)
1675{
1676  rtx_insn *last_insn = BB_END (bb);
1677  rtx cond;
1678
1679  if (! can_predict_insn_p (last_insn))
1680    return;
1681  cond = get_condition (last_insn, NULL, false, false);
1682  if (! cond)
1683    return;
1684
1685  /* Try "pointer heuristic."
1686     A comparison ptr == 0 is predicted as false.
1687     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1688  if (COMPARISON_P (cond)
1689      && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1690	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1691    {
1692      if (GET_CODE (cond) == EQ)
1693	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1694      else if (GET_CODE (cond) == NE)
1695	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1696    }
1697  else
1698
1699  /* Try "opcode heuristic."
1700     EQ tests are usually false and NE tests are usually true. Also,
1701     most quantities are positive, so we can make the appropriate guesses
1702     about signed comparisons against zero.  */
1703    switch (GET_CODE (cond))
1704      {
1705      case CONST_INT:
1706	/* Unconditional branch.  */
1707	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1708			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
1709	break;
1710
1711      case EQ:
1712      case UNEQ:
1713	/* Floating point comparisons appears to behave in a very
1714	   unpredictable way because of special role of = tests in
1715	   FP code.  */
1716	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1717	  ;
1718	/* Comparisons with 0 are often used for booleans and there is
1719	   nothing useful to predict about them.  */
1720	else if (XEXP (cond, 1) == const0_rtx
1721		 || XEXP (cond, 0) == const0_rtx)
1722	  ;
1723	else
1724	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1725	break;
1726
1727      case NE:
1728      case LTGT:
1729	/* Floating point comparisons appears to behave in a very
1730	   unpredictable way because of special role of = tests in
1731	   FP code.  */
1732	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1733	  ;
1734	/* Comparisons with 0 are often used for booleans and there is
1735	   nothing useful to predict about them.  */
1736	else if (XEXP (cond, 1) == const0_rtx
1737		 || XEXP (cond, 0) == const0_rtx)
1738	  ;
1739	else
1740	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1741	break;
1742
1743      case ORDERED:
1744	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1745	break;
1746
1747      case UNORDERED:
1748	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1749	break;
1750
1751      case LE:
1752      case LT:
1753	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1754	    || XEXP (cond, 1) == constm1_rtx)
1755	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1756	break;
1757
1758      case GE:
1759      case GT:
1760	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1761	    || XEXP (cond, 1) == constm1_rtx)
1762	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1763	break;
1764
1765      default:
1766	break;
1767      }
1768}
1769
1770/* Set edge->probability for each successor edge of BB.  */
1771void
1772guess_outgoing_edge_probabilities (basic_block bb)
1773{
1774  bb_estimate_probability_locally (bb);
1775  combine_predictions_for_insn (BB_END (bb), bb);
1776}
1777
1778static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1779
1780/* Helper function for expr_expected_value.  */
1781
1782static tree
1783expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1784		       tree op1, bitmap visited, enum br_predictor *predictor)
1785{
1786  gimple def;
1787
1788  if (predictor)
1789    *predictor = PRED_UNCONDITIONAL;
1790
1791  if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1792    {
1793      if (TREE_CONSTANT (op0))
1794	return op0;
1795
1796      if (code != SSA_NAME)
1797	return NULL_TREE;
1798
1799      def = SSA_NAME_DEF_STMT (op0);
1800
1801      /* If we were already here, break the infinite cycle.  */
1802      if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1803	return NULL;
1804
1805      if (gimple_code (def) == GIMPLE_PHI)
1806	{
1807	  /* All the arguments of the PHI node must have the same constant
1808	     length.  */
1809	  int i, n = gimple_phi_num_args (def);
1810	  tree val = NULL, new_val;
1811
1812	  for (i = 0; i < n; i++)
1813	    {
1814	      tree arg = PHI_ARG_DEF (def, i);
1815	      enum br_predictor predictor2;
1816
1817	      /* If this PHI has itself as an argument, we cannot
1818		 determine the string length of this argument.  However,
1819		 if we can find an expected constant value for the other
1820		 PHI args then we can still be sure that this is
1821		 likely a constant.  So be optimistic and just
1822		 continue with the next argument.  */
1823	      if (arg == PHI_RESULT (def))
1824		continue;
1825
1826	      new_val = expr_expected_value (arg, visited, &predictor2);
1827
1828	      /* It is difficult to combine value predictors.  Simply assume
1829		 that later predictor is weaker and take its prediction.  */
1830	      if (predictor && *predictor < predictor2)
1831		*predictor = predictor2;
1832	      if (!new_val)
1833		return NULL;
1834	      if (!val)
1835		val = new_val;
1836	      else if (!operand_equal_p (val, new_val, false))
1837		return NULL;
1838	    }
1839	  return val;
1840	}
1841      if (is_gimple_assign (def))
1842	{
1843	  if (gimple_assign_lhs (def) != op0)
1844	    return NULL;
1845
1846	  return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1847					gimple_assign_rhs1 (def),
1848					gimple_assign_rhs_code (def),
1849					gimple_assign_rhs2 (def),
1850					visited, predictor);
1851	}
1852
1853      if (is_gimple_call (def))
1854	{
1855	  tree decl = gimple_call_fndecl (def);
1856	  if (!decl)
1857	    {
1858	      if (gimple_call_internal_p (def)
1859		  && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1860		{
1861		  gcc_assert (gimple_call_num_args (def) == 3);
1862		  tree val = gimple_call_arg (def, 0);
1863		  if (TREE_CONSTANT (val))
1864		    return val;
1865		  if (predictor)
1866		    {
1867		      tree val2 = gimple_call_arg (def, 2);
1868		      gcc_assert (TREE_CODE (val2) == INTEGER_CST
1869				  && tree_fits_uhwi_p (val2)
1870				  && tree_to_uhwi (val2) < END_PREDICTORS);
1871		      *predictor = (enum br_predictor) tree_to_uhwi (val2);
1872		    }
1873		  return gimple_call_arg (def, 1);
1874		}
1875	      return NULL;
1876	    }
1877	  if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1878	    switch (DECL_FUNCTION_CODE (decl))
1879	      {
1880	      case BUILT_IN_EXPECT:
1881		{
1882		  tree val;
1883		  if (gimple_call_num_args (def) != 2)
1884		    return NULL;
1885		  val = gimple_call_arg (def, 0);
1886		  if (TREE_CONSTANT (val))
1887		    return val;
1888		  if (predictor)
1889		    *predictor = PRED_BUILTIN_EXPECT;
1890		  return gimple_call_arg (def, 1);
1891		}
1892
1893	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1894	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1895	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1896	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1897	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1898	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1899	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1900	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1901	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1902	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1903	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1904	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1905	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1906		/* Assume that any given atomic operation has low contention,
1907		   and thus the compare-and-swap operation succeeds.  */
1908		if (predictor)
1909		  *predictor = PRED_COMPARE_AND_SWAP;
1910		return boolean_true_node;
1911	      default:
1912		break;
1913	    }
1914	}
1915
1916      return NULL;
1917    }
1918
1919  if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1920    {
1921      tree res;
1922      enum br_predictor predictor2;
1923      op0 = expr_expected_value (op0, visited, predictor);
1924      if (!op0)
1925	return NULL;
1926      op1 = expr_expected_value (op1, visited, &predictor2);
1927      if (predictor && *predictor < predictor2)
1928	*predictor = predictor2;
1929      if (!op1)
1930	return NULL;
1931      res = fold_build2 (code, type, op0, op1);
1932      if (TREE_CONSTANT (res))
1933	return res;
1934      return NULL;
1935    }
1936  if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1937    {
1938      tree res;
1939      op0 = expr_expected_value (op0, visited, predictor);
1940      if (!op0)
1941	return NULL;
1942      res = fold_build1 (code, type, op0);
1943      if (TREE_CONSTANT (res))
1944	return res;
1945      return NULL;
1946    }
1947  return NULL;
1948}
1949
1950/* Return constant EXPR will likely have at execution time, NULL if unknown.
1951   The function is used by builtin_expect branch predictor so the evidence
1952   must come from this construct and additional possible constant folding.
1953
1954   We may want to implement more involved value guess (such as value range
1955   propagation based prediction), but such tricks shall go to new
1956   implementation.  */
1957
1958static tree
1959expr_expected_value (tree expr, bitmap visited,
1960		     enum br_predictor *predictor)
1961{
1962  enum tree_code code;
1963  tree op0, op1;
1964
1965  if (TREE_CONSTANT (expr))
1966    {
1967      if (predictor)
1968	*predictor = PRED_UNCONDITIONAL;
1969      return expr;
1970    }
1971
1972  extract_ops_from_tree (expr, &code, &op0, &op1);
1973  return expr_expected_value_1 (TREE_TYPE (expr),
1974				op0, code, op1, visited, predictor);
1975}
1976
1977/* Predict using opcode of the last statement in basic block.  */
1978static void
1979tree_predict_by_opcode (basic_block bb)
1980{
1981  gimple stmt = last_stmt (bb);
1982  edge then_edge;
1983  tree op0, op1;
1984  tree type;
1985  tree val;
1986  enum tree_code cmp;
1987  bitmap visited;
1988  edge_iterator ei;
1989  enum br_predictor predictor;
1990
1991  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1992    return;
1993  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1994    if (then_edge->flags & EDGE_TRUE_VALUE)
1995      break;
1996  op0 = gimple_cond_lhs (stmt);
1997  op1 = gimple_cond_rhs (stmt);
1998  cmp = gimple_cond_code (stmt);
1999  type = TREE_TYPE (op0);
2000  visited = BITMAP_ALLOC (NULL);
2001  val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
2002			       &predictor);
2003  BITMAP_FREE (visited);
2004  if (val && TREE_CODE (val) == INTEGER_CST)
2005    {
2006      if (predictor == PRED_BUILTIN_EXPECT)
2007	{
2008	  int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2009
2010	  gcc_assert (percent >= 0 && percent <= 100);
2011	  if (integer_zerop (val))
2012	    percent = 100 - percent;
2013	  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2014	}
2015      else
2016	predict_edge (then_edge, predictor,
2017		      integer_zerop (val) ? NOT_TAKEN : TAKEN);
2018    }
2019  /* Try "pointer heuristic."
2020     A comparison ptr == 0 is predicted as false.
2021     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2022  if (POINTER_TYPE_P (type))
2023    {
2024      if (cmp == EQ_EXPR)
2025	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2026      else if (cmp == NE_EXPR)
2027	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2028    }
2029  else
2030
2031  /* Try "opcode heuristic."
2032     EQ tests are usually false and NE tests are usually true. Also,
2033     most quantities are positive, so we can make the appropriate guesses
2034     about signed comparisons against zero.  */
2035    switch (cmp)
2036      {
2037      case EQ_EXPR:
2038      case UNEQ_EXPR:
2039	/* Floating point comparisons appears to behave in a very
2040	   unpredictable way because of special role of = tests in
2041	   FP code.  */
2042	if (FLOAT_TYPE_P (type))
2043	  ;
2044	/* Comparisons with 0 are often used for booleans and there is
2045	   nothing useful to predict about them.  */
2046	else if (integer_zerop (op0) || integer_zerop (op1))
2047	  ;
2048	else
2049	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2050	break;
2051
2052      case NE_EXPR:
2053      case LTGT_EXPR:
2054	/* Floating point comparisons appears to behave in a very
2055	   unpredictable way because of special role of = tests in
2056	   FP code.  */
2057	if (FLOAT_TYPE_P (type))
2058	  ;
2059	/* Comparisons with 0 are often used for booleans and there is
2060	   nothing useful to predict about them.  */
2061	else if (integer_zerop (op0)
2062		 || integer_zerop (op1))
2063	  ;
2064	else
2065	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2066	break;
2067
2068      case ORDERED_EXPR:
2069	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2070	break;
2071
2072      case UNORDERED_EXPR:
2073	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2074	break;
2075
2076      case LE_EXPR:
2077      case LT_EXPR:
2078	if (integer_zerop (op1)
2079	    || integer_onep (op1)
2080	    || integer_all_onesp (op1)
2081	    || real_zerop (op1)
2082	    || real_onep (op1)
2083	    || real_minus_onep (op1))
2084	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2085	break;
2086
2087      case GE_EXPR:
2088      case GT_EXPR:
2089	if (integer_zerop (op1)
2090	    || integer_onep (op1)
2091	    || integer_all_onesp (op1)
2092	    || real_zerop (op1)
2093	    || real_onep (op1)
2094	    || real_minus_onep (op1))
2095	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2096	break;
2097
2098      default:
2099	break;
2100      }
2101}
2102
2103/* Try to guess whether the value of return means error code.  */
2104
2105static enum br_predictor
2106return_prediction (tree val, enum prediction *prediction)
2107{
2108  /* VOID.  */
2109  if (!val)
2110    return PRED_NO_PREDICTION;
2111  /* Different heuristics for pointers and scalars.  */
2112  if (POINTER_TYPE_P (TREE_TYPE (val)))
2113    {
2114      /* NULL is usually not returned.  */
2115      if (integer_zerop (val))
2116	{
2117	  *prediction = NOT_TAKEN;
2118	  return PRED_NULL_RETURN;
2119	}
2120    }
2121  else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2122    {
2123      /* Negative return values are often used to indicate
2124         errors.  */
2125      if (TREE_CODE (val) == INTEGER_CST
2126	  && tree_int_cst_sgn (val) < 0)
2127	{
2128	  *prediction = NOT_TAKEN;
2129	  return PRED_NEGATIVE_RETURN;
2130	}
2131      /* Constant return values seems to be commonly taken.
2132         Zero/one often represent booleans so exclude them from the
2133	 heuristics.  */
2134      if (TREE_CONSTANT (val)
2135	  && (!integer_zerop (val) && !integer_onep (val)))
2136	{
2137	  *prediction = TAKEN;
2138	  return PRED_CONST_RETURN;
2139	}
2140    }
2141  return PRED_NO_PREDICTION;
2142}
2143
2144/* Find the basic block with return expression and look up for possible
2145   return value trying to apply RETURN_PREDICTION heuristics.  */
2146static void
2147apply_return_prediction (void)
2148{
2149  gimple return_stmt = NULL;
2150  tree return_val;
2151  edge e;
2152  gimple phi;
2153  int phi_num_args, i;
2154  enum br_predictor pred;
2155  enum prediction direction;
2156  edge_iterator ei;
2157
2158  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2159    {
2160      return_stmt = last_stmt (e->src);
2161      if (return_stmt
2162	  && gimple_code (return_stmt) == GIMPLE_RETURN)
2163	break;
2164    }
2165  if (!e)
2166    return;
2167  return_val = gimple_return_retval (return_stmt);
2168  if (!return_val)
2169    return;
2170  if (TREE_CODE (return_val) != SSA_NAME
2171      || !SSA_NAME_DEF_STMT (return_val)
2172      || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2173    return;
2174  phi = SSA_NAME_DEF_STMT (return_val);
2175  phi_num_args = gimple_phi_num_args (phi);
2176  pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2177
2178  /* Avoid the degenerate case where all return values form the function
2179     belongs to same category (ie they are all positive constants)
2180     so we can hardly say something about them.  */
2181  for (i = 1; i < phi_num_args; i++)
2182    if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2183      break;
2184  if (i != phi_num_args)
2185    for (i = 0; i < phi_num_args; i++)
2186      {
2187	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2188	if (pred != PRED_NO_PREDICTION)
2189	  predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2190				         direction);
2191      }
2192}
2193
2194/* Look for basic block that contains unlikely to happen events
2195   (such as noreturn calls) and mark all paths leading to execution
2196   of this basic blocks as unlikely.  */
2197
2198static void
2199tree_bb_level_predictions (void)
2200{
2201  basic_block bb;
2202  bool has_return_edges = false;
2203  edge e;
2204  edge_iterator ei;
2205
2206  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2207    if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2208      {
2209        has_return_edges = true;
2210	break;
2211      }
2212
2213  apply_return_prediction ();
2214
2215  FOR_EACH_BB_FN (bb, cfun)
2216    {
2217      gimple_stmt_iterator gsi;
2218
2219      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2220	{
2221	  gimple stmt = gsi_stmt (gsi);
2222	  tree decl;
2223
2224	  if (is_gimple_call (stmt))
2225	    {
2226	      if ((gimple_call_flags (stmt) & ECF_NORETURN)
2227	          && has_return_edges)
2228		predict_paths_leading_to (bb, PRED_NORETURN,
2229					  NOT_TAKEN);
2230	      decl = gimple_call_fndecl (stmt);
2231	      if (decl
2232		  && lookup_attribute ("cold",
2233				       DECL_ATTRIBUTES (decl)))
2234		predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2235					  NOT_TAKEN);
2236	    }
2237	  else if (gimple_code (stmt) == GIMPLE_PREDICT)
2238	    {
2239	      predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2240					gimple_predict_outcome (stmt));
2241	      /* Keep GIMPLE_PREDICT around so early inlining will propagate
2242	         hints to callers.  */
2243	    }
2244	}
2245    }
2246}
2247
2248#ifdef ENABLE_CHECKING
2249
2250/* Callback for hash_map::traverse, asserts that the pointer map is
2251   empty.  */
2252
2253bool
2254assert_is_empty (const_basic_block const &, edge_prediction *const &value,
2255		 void *)
2256{
2257  gcc_assert (!value);
2258  return false;
2259}
2260#endif
2261
2262/* Predict branch probabilities and estimate profile for basic block BB.  */
2263
2264static void
2265tree_estimate_probability_bb (basic_block bb)
2266{
2267  edge e;
2268  edge_iterator ei;
2269  gimple last;
2270
2271  FOR_EACH_EDGE (e, ei, bb->succs)
2272    {
2273      /* Predict edges to user labels with attributes.  */
2274      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2275	{
2276	  gimple_stmt_iterator gi;
2277	  for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2278	    {
2279	      gimple stmt = gsi_stmt (gi);
2280	      tree decl;
2281
2282	      if (gimple_code (stmt) != GIMPLE_LABEL)
2283		break;
2284	      decl = gimple_label_label (stmt);
2285	      if (DECL_ARTIFICIAL (decl))
2286		continue;
2287
2288	      /* Finally, we have a user-defined label.  */
2289	      if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2290		predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2291	      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2292		predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2293	    }
2294	}
2295
2296      /* Predict early returns to be probable, as we've already taken
2297	 care for error returns and other cases are often used for
2298	 fast paths through function.
2299
2300	 Since we've already removed the return statements, we are
2301	 looking for CFG like:
2302
2303	 if (conditional)
2304	 {
2305	 ..
2306	 goto return_block
2307	 }
2308	 some other blocks
2309	 return_block:
2310	 return_stmt.  */
2311      if (e->dest != bb->next_bb
2312	  && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2313	  && single_succ_p (e->dest)
2314	  && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2315	  && (last = last_stmt (e->dest)) != NULL
2316	  && gimple_code (last) == GIMPLE_RETURN)
2317	{
2318	  edge e1;
2319	  edge_iterator ei1;
2320
2321	  if (single_succ_p (bb))
2322	    {
2323	      FOR_EACH_EDGE (e1, ei1, bb->preds)
2324		if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2325		    && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2326		    && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2327		  predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2328	    }
2329	  else
2330	    if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2331		&& !predicted_by_p (e->src, PRED_CONST_RETURN)
2332		&& !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2333	      predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2334	}
2335
2336      /* Look for block we are guarding (ie we dominate it,
2337	 but it doesn't postdominate us).  */
2338      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2339	  && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2340	  && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2341	{
2342	  gimple_stmt_iterator bi;
2343
2344	  /* The call heuristic claims that a guarded function call
2345	     is improbable.  This is because such calls are often used
2346	     to signal exceptional situations such as printing error
2347	     messages.  */
2348	  for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2349	       gsi_next (&bi))
2350	    {
2351	      gimple stmt = gsi_stmt (bi);
2352	      if (is_gimple_call (stmt)
2353		  /* Constant and pure calls are hardly used to signalize
2354		     something exceptional.  */
2355		  && gimple_has_side_effects (stmt))
2356		{
2357		  predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2358		  break;
2359		}
2360	    }
2361	}
2362    }
2363  tree_predict_by_opcode (bb);
2364}
2365
2366/* Predict branch probabilities and estimate profile of the tree CFG.
2367   This function can be called from the loop optimizers to recompute
2368   the profile information.  */
2369
2370void
2371tree_estimate_probability (void)
2372{
2373  basic_block bb;
2374
2375  add_noreturn_fake_exit_edges ();
2376  connect_infinite_loops_to_exit ();
2377  /* We use loop_niter_by_eval, which requires that the loops have
2378     preheaders.  */
2379  create_preheaders (CP_SIMPLE_PREHEADERS);
2380  calculate_dominance_info (CDI_POST_DOMINATORS);
2381
2382  bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
2383  tree_bb_level_predictions ();
2384  record_loop_exits ();
2385
2386  if (number_of_loops (cfun) > 1)
2387    predict_loops ();
2388
2389  FOR_EACH_BB_FN (bb, cfun)
2390    tree_estimate_probability_bb (bb);
2391
2392  FOR_EACH_BB_FN (bb, cfun)
2393    combine_predictions_for_bb (bb);
2394
2395#ifdef ENABLE_CHECKING
2396  bb_predictions->traverse<void *, assert_is_empty> (NULL);
2397#endif
2398  delete bb_predictions;
2399  bb_predictions = NULL;
2400
2401  estimate_bb_frequencies (false);
2402  free_dominance_info (CDI_POST_DOMINATORS);
2403  remove_fake_exit_edges ();
2404}
2405
2406/* Predict edges to successors of CUR whose sources are not postdominated by
2407   BB by PRED and recurse to all postdominators.  */
2408
2409static void
2410predict_paths_for_bb (basic_block cur, basic_block bb,
2411		      enum br_predictor pred,
2412		      enum prediction taken,
2413		      bitmap visited)
2414{
2415  edge e;
2416  edge_iterator ei;
2417  basic_block son;
2418
2419  /* We are looking for all edges forming edge cut induced by
2420     set of all blocks postdominated by BB.  */
2421  FOR_EACH_EDGE (e, ei, cur->preds)
2422    if (e->src->index >= NUM_FIXED_BLOCKS
2423	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2424    {
2425      edge e2;
2426      edge_iterator ei2;
2427      bool found = false;
2428
2429      /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2430      if (e->flags & (EDGE_EH | EDGE_FAKE))
2431	continue;
2432      gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2433
2434      /* See if there is an edge from e->src that is not abnormal
2435	 and does not lead to BB.  */
2436      FOR_EACH_EDGE (e2, ei2, e->src->succs)
2437	if (e2 != e
2438	    && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2439	    && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2440	  {
2441	    found = true;
2442	    break;
2443	  }
2444
2445      /* If there is non-abnormal path leaving e->src, predict edge
2446	 using predictor.  Otherwise we need to look for paths
2447	 leading to e->src.
2448
2449	 The second may lead to infinite loop in the case we are predicitng
2450	 regions that are only reachable by abnormal edges.  We simply
2451	 prevent visiting given BB twice.  */
2452      if (found)
2453        predict_edge_def (e, pred, taken);
2454      else if (bitmap_set_bit (visited, e->src->index))
2455	predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2456    }
2457  for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2458       son;
2459       son = next_dom_son (CDI_POST_DOMINATORS, son))
2460    predict_paths_for_bb (son, bb, pred, taken, visited);
2461}
2462
2463/* Sets branch probabilities according to PREDiction and
2464   FLAGS.  */
2465
2466static void
2467predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2468			  enum prediction taken)
2469{
2470  bitmap visited = BITMAP_ALLOC (NULL);
2471  predict_paths_for_bb (bb, bb, pred, taken, visited);
2472  BITMAP_FREE (visited);
2473}
2474
2475/* Like predict_paths_leading_to but take edge instead of basic block.  */
2476
2477static void
2478predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2479			       enum prediction taken)
2480{
2481  bool has_nonloop_edge = false;
2482  edge_iterator ei;
2483  edge e2;
2484
2485  basic_block bb = e->src;
2486  FOR_EACH_EDGE (e2, ei, bb->succs)
2487    if (e2->dest != e->src && e2->dest != e->dest
2488	&& !(e->flags & (EDGE_EH | EDGE_FAKE))
2489	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2490      {
2491	has_nonloop_edge = true;
2492	break;
2493      }
2494  if (!has_nonloop_edge)
2495    {
2496      bitmap visited = BITMAP_ALLOC (NULL);
2497      predict_paths_for_bb (bb, bb, pred, taken, visited);
2498      BITMAP_FREE (visited);
2499    }
2500  else
2501    predict_edge_def (e, pred, taken);
2502}
2503
2504/* This is used to carry information about basic blocks.  It is
2505   attached to the AUX field of the standard CFG block.  */
2506
2507struct block_info
2508{
2509  /* Estimated frequency of execution of basic_block.  */
2510  sreal frequency;
2511
2512  /* To keep queue of basic blocks to process.  */
2513  basic_block next;
2514
2515  /* Number of predecessors we need to visit first.  */
2516  int npredecessors;
2517};
2518
2519/* Similar information for edges.  */
2520struct edge_prob_info
2521{
2522  /* In case edge is a loopback edge, the probability edge will be reached
2523     in case header is.  Estimated number of iterations of the loop can be
2524     then computed as 1 / (1 - back_edge_prob).  */
2525  sreal back_edge_prob;
2526  /* True if the edge is a loopback edge in the natural loop.  */
2527  unsigned int back_edge:1;
2528};
2529
2530#define BLOCK_INFO(B)	((block_info *) (B)->aux)
2531#undef EDGE_INFO
2532#define EDGE_INFO(E)	((edge_prob_info *) (E)->aux)
2533
2534/* Helper function for estimate_bb_frequencies.
2535   Propagate the frequencies in blocks marked in
2536   TOVISIT, starting in HEAD.  */
2537
2538static void
2539propagate_freq (basic_block head, bitmap tovisit)
2540{
2541  basic_block bb;
2542  basic_block last;
2543  unsigned i;
2544  edge e;
2545  basic_block nextbb;
2546  bitmap_iterator bi;
2547
2548  /* For each basic block we need to visit count number of his predecessors
2549     we need to visit first.  */
2550  EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2551    {
2552      edge_iterator ei;
2553      int count = 0;
2554
2555      bb = BASIC_BLOCK_FOR_FN (cfun, i);
2556
2557      FOR_EACH_EDGE (e, ei, bb->preds)
2558	{
2559	  bool visit = bitmap_bit_p (tovisit, e->src->index);
2560
2561	  if (visit && !(e->flags & EDGE_DFS_BACK))
2562	    count++;
2563	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2564	    fprintf (dump_file,
2565		     "Irreducible region hit, ignoring edge to %i->%i\n",
2566		     e->src->index, bb->index);
2567	}
2568      BLOCK_INFO (bb)->npredecessors = count;
2569      /* When function never returns, we will never process exit block.  */
2570      if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2571	bb->count = bb->frequency = 0;
2572    }
2573
2574  memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2575  last = head;
2576  for (bb = head; bb; bb = nextbb)
2577    {
2578      edge_iterator ei;
2579      sreal cyclic_probability, frequency;
2580
2581      memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2582      memcpy (&frequency, &real_zero, sizeof (real_zero));
2583
2584      nextbb = BLOCK_INFO (bb)->next;
2585      BLOCK_INFO (bb)->next = NULL;
2586
2587      /* Compute frequency of basic block.  */
2588      if (bb != head)
2589	{
2590#ifdef ENABLE_CHECKING
2591	  FOR_EACH_EDGE (e, ei, bb->preds)
2592	    gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2593			|| (e->flags & EDGE_DFS_BACK));
2594#endif
2595
2596	  FOR_EACH_EDGE (e, ei, bb->preds)
2597	    if (EDGE_INFO (e)->back_edge)
2598	      {
2599		sreal_add (&cyclic_probability, &cyclic_probability,
2600			   &EDGE_INFO (e)->back_edge_prob);
2601	      }
2602	    else if (!(e->flags & EDGE_DFS_BACK))
2603	      {
2604		sreal tmp;
2605
2606		/*  frequency += (e->probability
2607				  * BLOCK_INFO (e->src)->frequency /
2608				  REG_BR_PROB_BASE);  */
2609
2610		sreal_init (&tmp, e->probability, 0);
2611		sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2612		sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2613		sreal_add (&frequency, &frequency, &tmp);
2614	      }
2615
2616	  if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2617	    {
2618	      memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2619		      sizeof (frequency));
2620	    }
2621	  else
2622	    {
2623	      if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2624		{
2625		  memcpy (&cyclic_probability, &real_almost_one,
2626			  sizeof (real_almost_one));
2627		}
2628
2629	      /* BLOCK_INFO (bb)->frequency = frequency
2630					      / (1 - cyclic_probability) */
2631
2632	      sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2633	      sreal_div (&BLOCK_INFO (bb)->frequency,
2634			 &frequency, &cyclic_probability);
2635	    }
2636	}
2637
2638      bitmap_clear_bit (tovisit, bb->index);
2639
2640      e = find_edge (bb, head);
2641      if (e)
2642	{
2643	  sreal tmp;
2644
2645	  /* EDGE_INFO (e)->back_edge_prob
2646	     = ((e->probability * BLOCK_INFO (bb)->frequency)
2647	     / REG_BR_PROB_BASE); */
2648
2649	  sreal_init (&tmp, e->probability, 0);
2650	  sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2651	  sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2652		     &tmp, &real_inv_br_prob_base);
2653	}
2654
2655      /* Propagate to successor blocks.  */
2656      FOR_EACH_EDGE (e, ei, bb->succs)
2657	if (!(e->flags & EDGE_DFS_BACK)
2658	    && BLOCK_INFO (e->dest)->npredecessors)
2659	  {
2660	    BLOCK_INFO (e->dest)->npredecessors--;
2661	    if (!BLOCK_INFO (e->dest)->npredecessors)
2662	      {
2663		if (!nextbb)
2664		  nextbb = e->dest;
2665		else
2666		  BLOCK_INFO (last)->next = e->dest;
2667
2668		last = e->dest;
2669	      }
2670	  }
2671    }
2672}
2673
2674/* Estimate frequencies in loops at same nest level.  */
2675
2676static void
2677estimate_loops_at_level (struct loop *first_loop)
2678{
2679  struct loop *loop;
2680
2681  for (loop = first_loop; loop; loop = loop->next)
2682    {
2683      edge e;
2684      basic_block *bbs;
2685      unsigned i;
2686      bitmap tovisit = BITMAP_ALLOC (NULL);
2687
2688      estimate_loops_at_level (loop->inner);
2689
2690      /* Find current loop back edge and mark it.  */
2691      e = loop_latch_edge (loop);
2692      EDGE_INFO (e)->back_edge = 1;
2693
2694      bbs = get_loop_body (loop);
2695      for (i = 0; i < loop->num_nodes; i++)
2696	bitmap_set_bit (tovisit, bbs[i]->index);
2697      free (bbs);
2698      propagate_freq (loop->header, tovisit);
2699      BITMAP_FREE (tovisit);
2700    }
2701}
2702
2703/* Propagates frequencies through structure of loops.  */
2704
2705static void
2706estimate_loops (void)
2707{
2708  bitmap tovisit = BITMAP_ALLOC (NULL);
2709  basic_block bb;
2710
2711  /* Start by estimating the frequencies in the loops.  */
2712  if (number_of_loops (cfun) > 1)
2713    estimate_loops_at_level (current_loops->tree_root->inner);
2714
2715  /* Now propagate the frequencies through all the blocks.  */
2716  FOR_ALL_BB_FN (bb, cfun)
2717    {
2718      bitmap_set_bit (tovisit, bb->index);
2719    }
2720  propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2721  BITMAP_FREE (tovisit);
2722}
2723
2724/* Drop the profile for NODE to guessed, and update its frequency based on
2725   whether it is expected to be hot given the CALL_COUNT.  */
2726
2727static void
2728drop_profile (struct cgraph_node *node, gcov_type call_count)
2729{
2730  struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2731  /* In the case where this was called by another function with a
2732     dropped profile, call_count will be 0. Since there are no
2733     non-zero call counts to this function, we don't know for sure
2734     whether it is hot, and therefore it will be marked normal below.  */
2735  bool hot = maybe_hot_count_p (NULL, call_count);
2736
2737  if (dump_file)
2738    fprintf (dump_file,
2739             "Dropping 0 profile for %s/%i. %s based on calls.\n",
2740             node->name (), node->order,
2741             hot ? "Function is hot" : "Function is normal");
2742  /* We only expect to miss profiles for functions that are reached
2743     via non-zero call edges in cases where the function may have
2744     been linked from another module or library (COMDATs and extern
2745     templates). See the comments below for handle_missing_profiles.
2746     Also, only warn in cases where the missing counts exceed the
2747     number of training runs. In certain cases with an execv followed
2748     by a no-return call the profile for the no-return call is not
2749     dumped and there can be a mismatch.  */
2750  if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2751      && call_count > profile_info->runs)
2752    {
2753      if (flag_profile_correction)
2754        {
2755          if (dump_file)
2756            fprintf (dump_file,
2757                     "Missing counts for called function %s/%i\n",
2758                     node->name (), node->order);
2759        }
2760      else
2761        warning (0, "Missing counts for called function %s/%i",
2762                 node->name (), node->order);
2763    }
2764
2765  profile_status_for_fn (fn)
2766      = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2767  node->frequency
2768      = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2769}
2770
2771/* In the case of COMDAT routines, multiple object files will contain the same
2772   function and the linker will select one for the binary. In that case
2773   all the other copies from the profile instrument binary will be missing
2774   profile counts. Look for cases where this happened, due to non-zero
2775   call counts going to 0-count functions, and drop the profile to guessed
2776   so that we can use the estimated probabilities and avoid optimizing only
2777   for size.
2778
2779   The other case where the profile may be missing is when the routine
2780   is not going to be emitted to the object file, e.g. for "extern template"
2781   class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2782   all other cases of non-zero calls to 0-count functions.  */
2783
2784void
2785handle_missing_profiles (void)
2786{
2787  struct cgraph_node *node;
2788  int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2789  vec<struct cgraph_node *> worklist;
2790  worklist.create (64);
2791
2792  /* See if 0 count function has non-0 count callers.  In this case we
2793     lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2794  FOR_EACH_DEFINED_FUNCTION (node)
2795    {
2796      struct cgraph_edge *e;
2797      gcov_type call_count = 0;
2798      gcov_type max_tp_first_run = 0;
2799      struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2800
2801      if (node->count)
2802        continue;
2803      for (e = node->callers; e; e = e->next_caller)
2804      {
2805        call_count += e->count;
2806
2807	if (e->caller->tp_first_run > max_tp_first_run)
2808	  max_tp_first_run = e->caller->tp_first_run;
2809      }
2810
2811      /* If time profile is missing, let assign the maximum that comes from
2812	 caller functions.  */
2813      if (!node->tp_first_run && max_tp_first_run)
2814	node->tp_first_run = max_tp_first_run + 1;
2815
2816      if (call_count
2817          && fn && fn->cfg
2818          && (call_count * unlikely_count_fraction >= profile_info->runs))
2819        {
2820          drop_profile (node, call_count);
2821          worklist.safe_push (node);
2822        }
2823    }
2824
2825  /* Propagate the profile dropping to other 0-count COMDATs that are
2826     potentially called by COMDATs we already dropped the profile on.  */
2827  while (worklist.length () > 0)
2828    {
2829      struct cgraph_edge *e;
2830
2831      node = worklist.pop ();
2832      for (e = node->callees; e; e = e->next_caller)
2833        {
2834          struct cgraph_node *callee = e->callee;
2835          struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2836
2837          if (callee->count > 0)
2838            continue;
2839          if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2840              && profile_status_for_fn (fn) == PROFILE_READ)
2841            {
2842              drop_profile (node, 0);
2843              worklist.safe_push (callee);
2844            }
2845        }
2846    }
2847  worklist.release ();
2848}
2849
2850/* Convert counts measured by profile driven feedback to frequencies.
2851   Return nonzero iff there was any nonzero execution count.  */
2852
2853int
2854counts_to_freqs (void)
2855{
2856  gcov_type count_max, true_count_max = 0;
2857  basic_block bb;
2858
2859  /* Don't overwrite the estimated frequencies when the profile for
2860     the function is missing.  We may drop this function PROFILE_GUESSED
2861     later in drop_profile ().  */
2862  if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2863    return 0;
2864
2865  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2866    true_count_max = MAX (bb->count, true_count_max);
2867
2868  count_max = MAX (true_count_max, 1);
2869  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2870    bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2871
2872  return true_count_max;
2873}
2874
2875/* Return true if function is likely to be expensive, so there is no point to
2876   optimize performance of prologue, epilogue or do inlining at the expense
2877   of code size growth.  THRESHOLD is the limit of number of instructions
2878   function can execute at average to be still considered not expensive.  */
2879
2880bool
2881expensive_function_p (int threshold)
2882{
2883  unsigned int sum = 0;
2884  basic_block bb;
2885  unsigned int limit;
2886
2887  /* We can not compute accurately for large thresholds due to scaled
2888     frequencies.  */
2889  gcc_assert (threshold <= BB_FREQ_MAX);
2890
2891  /* Frequencies are out of range.  This either means that function contains
2892     internal loop executing more than BB_FREQ_MAX times or profile feedback
2893     is available and function has not been executed at all.  */
2894  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2895    return true;
2896
2897  /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2898  limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2899  FOR_EACH_BB_FN (bb, cfun)
2900    {
2901      rtx_insn *insn;
2902
2903      FOR_BB_INSNS (bb, insn)
2904	if (active_insn_p (insn))
2905	  {
2906	    sum += bb->frequency;
2907	    if (sum > limit)
2908	      return true;
2909	}
2910    }
2911
2912  return false;
2913}
2914
2915/* Estimate and propagate basic block frequencies using the given branch
2916   probabilities.  If FORCE is true, the frequencies are used to estimate
2917   the counts even when there are already non-zero profile counts.  */
2918
2919void
2920estimate_bb_frequencies (bool force)
2921{
2922  basic_block bb;
2923  sreal freq_max;
2924
2925  if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
2926    {
2927      static int real_values_initialized = 0;
2928
2929      if (!real_values_initialized)
2930        {
2931	  real_values_initialized = 1;
2932	  sreal_init (&real_zero, 0, 0);
2933	  sreal_init (&real_one, 1, 0);
2934	  sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
2935	  sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
2936	  sreal_init (&real_one_half, 1, -1);
2937	  sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
2938	  sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
2939	}
2940
2941      mark_dfs_back_edges ();
2942
2943      single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
2944	 REG_BR_PROB_BASE;
2945
2946      /* Set up block info for each basic block.  */
2947      alloc_aux_for_blocks (sizeof (block_info));
2948      alloc_aux_for_edges (sizeof (edge_prob_info));
2949      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2950	{
2951	  edge e;
2952	  edge_iterator ei;
2953
2954	  FOR_EACH_EDGE (e, ei, bb->succs)
2955	    {
2956	      sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
2957	      sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2958			 &EDGE_INFO (e)->back_edge_prob,
2959			 &real_inv_br_prob_base);
2960	    }
2961	}
2962
2963      /* First compute frequencies locally for each loop from innermost
2964         to outermost to examine frequencies for back edges.  */
2965      estimate_loops ();
2966
2967      memcpy (&freq_max, &real_zero, sizeof (real_zero));
2968      FOR_EACH_BB_FN (bb, cfun)
2969	if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
2970	  memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
2971
2972      sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
2973      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2974	{
2975	  sreal tmp;
2976
2977	  sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
2978	  sreal_add (&tmp, &tmp, &real_one_half);
2979	  bb->frequency = sreal_to_int (&tmp);
2980	}
2981
2982      free_aux_for_blocks ();
2983      free_aux_for_edges ();
2984    }
2985  compute_function_frequency ();
2986}
2987
2988/* Decide whether function is hot, cold or unlikely executed.  */
2989void
2990compute_function_frequency (void)
2991{
2992  basic_block bb;
2993  struct cgraph_node *node = cgraph_node::get (current_function_decl);
2994
2995  if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2996      || MAIN_NAME_P (DECL_NAME (current_function_decl)))
2997    node->only_called_at_startup = true;
2998  if (DECL_STATIC_DESTRUCTOR (current_function_decl))
2999    node->only_called_at_exit = true;
3000
3001  if (profile_status_for_fn (cfun) != PROFILE_READ)
3002    {
3003      int flags = flags_from_decl_or_type (current_function_decl);
3004      if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3005	  != NULL)
3006        node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3007      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3008	       != NULL)
3009        node->frequency = NODE_FREQUENCY_HOT;
3010      else if (flags & ECF_NORETURN)
3011        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3012      else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3013        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3014      else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3015	       || DECL_STATIC_DESTRUCTOR (current_function_decl))
3016        node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3017      return;
3018    }
3019
3020  /* Only first time try to drop function into unlikely executed.
3021     After inlining the roundoff errors may confuse us.
3022     Ipa-profile pass will drop functions only called from unlikely
3023     functions to unlikely and that is most of what we care about.  */
3024  if (!cfun->after_inlining)
3025    node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3026  FOR_EACH_BB_FN (bb, cfun)
3027    {
3028      if (maybe_hot_bb_p (cfun, bb))
3029	{
3030	  node->frequency = NODE_FREQUENCY_HOT;
3031	  return;
3032	}
3033      if (!probably_never_executed_bb_p (cfun, bb))
3034	node->frequency = NODE_FREQUENCY_NORMAL;
3035    }
3036}
3037
3038/* Build PREDICT_EXPR.  */
3039tree
3040build_predict_expr (enum br_predictor predictor, enum prediction taken)
3041{
3042  tree t = build1 (PREDICT_EXPR, void_type_node,
3043		   build_int_cst (integer_type_node, predictor));
3044  SET_PREDICT_EXPR_OUTCOME (t, taken);
3045  return t;
3046}
3047
3048const char *
3049predictor_name (enum br_predictor predictor)
3050{
3051  return predictor_info[predictor].name;
3052}
3053
3054/* Predict branch probabilities and estimate profile of the tree CFG. */
3055
3056namespace {
3057
3058const pass_data pass_data_profile =
3059{
3060  GIMPLE_PASS, /* type */
3061  "profile_estimate", /* name */
3062  OPTGROUP_NONE, /* optinfo_flags */
3063  TV_BRANCH_PROB, /* tv_id */
3064  PROP_cfg, /* properties_required */
3065  0, /* properties_provided */
3066  0, /* properties_destroyed */
3067  0, /* todo_flags_start */
3068  0, /* todo_flags_finish */
3069};
3070
3071class pass_profile : public gimple_opt_pass
3072{
3073public:
3074  pass_profile (gcc::context *ctxt)
3075    : gimple_opt_pass (pass_data_profile, ctxt)
3076  {}
3077
3078  /* opt_pass methods: */
3079  virtual bool gate (function *) { return flag_guess_branch_prob; }
3080  virtual unsigned int execute (function *);
3081
3082}; // class pass_profile
3083
3084unsigned int
3085pass_profile::execute (function *fun)
3086{
3087  unsigned nb_loops;
3088
3089  loop_optimizer_init (LOOPS_NORMAL);
3090  if (dump_file && (dump_flags & TDF_DETAILS))
3091    flow_loops_dump (dump_file, NULL, 0);
3092
3093  mark_irreducible_loops ();
3094
3095  nb_loops = number_of_loops (fun);
3096  if (nb_loops > 1)
3097    scev_initialize ();
3098
3099  tree_estimate_probability ();
3100
3101  if (nb_loops > 1)
3102    scev_finalize ();
3103
3104  loop_optimizer_finalize ();
3105  if (dump_file && (dump_flags & TDF_DETAILS))
3106    gimple_dump_cfg (dump_file, dump_flags);
3107 if (profile_status_for_fn (fun) == PROFILE_ABSENT)
3108    profile_status_for_fn (fun) = PROFILE_GUESSED;
3109  return 0;
3110}
3111
3112} // anon namespace
3113
3114gimple_opt_pass *
3115make_pass_profile (gcc::context *ctxt)
3116{
3117  return new pass_profile (ctxt);
3118}
3119
3120namespace {
3121
3122const pass_data pass_data_strip_predict_hints =
3123{
3124  GIMPLE_PASS, /* type */
3125  "*strip_predict_hints", /* name */
3126  OPTGROUP_NONE, /* optinfo_flags */
3127  TV_BRANCH_PROB, /* tv_id */
3128  PROP_cfg, /* properties_required */
3129  0, /* properties_provided */
3130  0, /* properties_destroyed */
3131  0, /* todo_flags_start */
3132  0, /* todo_flags_finish */
3133};
3134
3135class pass_strip_predict_hints : public gimple_opt_pass
3136{
3137public:
3138  pass_strip_predict_hints (gcc::context *ctxt)
3139    : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3140  {}
3141
3142  /* opt_pass methods: */
3143  opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
3144  virtual unsigned int execute (function *);
3145
3146}; // class pass_strip_predict_hints
3147
3148/* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
3149   we no longer need.  */
3150unsigned int
3151pass_strip_predict_hints::execute (function *fun)
3152{
3153  basic_block bb;
3154  gimple ass_stmt;
3155  tree var;
3156
3157  FOR_EACH_BB_FN (bb, fun)
3158    {
3159      gimple_stmt_iterator bi;
3160      for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
3161	{
3162	  gimple stmt = gsi_stmt (bi);
3163
3164	  if (gimple_code (stmt) == GIMPLE_PREDICT)
3165	    {
3166	      gsi_remove (&bi, true);
3167	      continue;
3168	    }
3169	  else if (is_gimple_call (stmt))
3170	    {
3171	      tree fndecl = gimple_call_fndecl (stmt);
3172
3173	      if ((fndecl
3174		   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3175		   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
3176		   && gimple_call_num_args (stmt) == 2)
3177		  || (gimple_call_internal_p (stmt)
3178		      && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
3179		{
3180		  var = gimple_call_lhs (stmt);
3181		  if (var)
3182		    {
3183		      ass_stmt
3184			= gimple_build_assign (var, gimple_call_arg (stmt, 0));
3185		      gsi_replace (&bi, ass_stmt, true);
3186		    }
3187		  else
3188		    {
3189		      gsi_remove (&bi, true);
3190		      continue;
3191		    }
3192		}
3193	    }
3194	  gsi_next (&bi);
3195	}
3196    }
3197  return 0;
3198}
3199
3200} // anon namespace
3201
3202gimple_opt_pass *
3203make_pass_strip_predict_hints (gcc::context *ctxt)
3204{
3205  return new pass_strip_predict_hints (ctxt);
3206}
3207
3208/* Rebuild function frequencies.  Passes are in general expected to
3209   maintain profile by hand, however in some cases this is not possible:
3210   for example when inlining several functions with loops freuqencies might run
3211   out of scale and thus needs to be recomputed.  */
3212
3213void
3214rebuild_frequencies (void)
3215{
3216  timevar_push (TV_REBUILD_FREQUENCIES);
3217
3218  /* When the max bb count in the function is small, there is a higher
3219     chance that there were truncation errors in the integer scaling
3220     of counts by inlining and other optimizations. This could lead
3221     to incorrect classification of code as being cold when it isn't.
3222     In that case, force the estimation of bb counts/frequencies from the
3223     branch probabilities, rather than computing frequencies from counts,
3224     which may also lead to frequencies incorrectly reduced to 0. There
3225     is less precision in the probabilities, so we only do this for small
3226     max counts.  */
3227  gcov_type count_max = 0;
3228  basic_block bb;
3229  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3230    count_max = MAX (bb->count, count_max);
3231
3232  if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3233      || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3234    {
3235      loop_optimizer_init (0);
3236      add_noreturn_fake_exit_edges ();
3237      mark_irreducible_loops ();
3238      connect_infinite_loops_to_exit ();
3239      estimate_bb_frequencies (true);
3240      remove_fake_exit_edges ();
3241      loop_optimizer_finalize ();
3242    }
3243  else if (profile_status_for_fn (cfun) == PROFILE_READ)
3244    counts_to_freqs ();
3245  else
3246    gcc_unreachable ();
3247  timevar_pop (TV_REBUILD_FREQUENCIES);
3248}
3249