Predator  [unstable] git snapshot
storage.hh
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2009 Kamil Dudka <kdudka@redhat.com>
3  *
4  * This file is part of predator.
5  *
6  * predator is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * any later version.
10  *
11  * predator is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with predator. If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef H_GUARD_STORAGE_H
21 #define H_GUARD_STORAGE_H
22 
23 #include "code_listener.h"
24 
25 #include <map>
26 #include <set>
27 #include <string>
28 #include <vector>
29 
30 #ifndef BUILDING_DOX
31 # define STD_VECTOR(type) std::vector<type>
32 #else
33  // this would cause a lot of syntax errors with any parser that takes C++
34  // seriously but doxygen does not complain and produces way better output
35 # define STD_VECTOR(type) type[]
36 #endif
37 
38 /**
39  * @file storage.hh
40  * object model that describes the analyzed code on the input
41  */
42 
43 /**
44  * object model that describes the analyzed code on the input
45  */
46 namespace CodeStorage {
47 
48 struct Insn;
49 
50 /**
51  * high-level variable (operand) classification
52  */
53 enum EVar {
54  VAR_VOID, ///< this should be used only internally
55  VAR_GL, ///< global (may be also static) variable
56  VAR_LC, ///< local variable (not valid beyond a function)
57  VAR_FNC_ARG ///< fnc argument (sort of local variable also)
58 };
59 
60 /**
61  * high-level variable representation
62  */
63 struct Var {
64  EVar code; ///< high-level kind of variable
65  struct cl_loc loc; ///< location of its declaration
66 
67  /**
68  * type of the variable
69  * @note This often differs from type of the operand given to constructor!
70  */
71  const struct cl_type *type;
72 
73  /**
74  * unique ID of variable
75  * @attention not guaranteed to be unique beyond the scope of variable
76  */
77  int uid;
78 
79  /**
80  * name of the variable, empty string for anonymous variables
81  */
82  std::string name;
83 
84  /**
85  * initializer (optional)
86  */
87  std::vector<const Insn *> initials;
88 
89  /**
90  * if true, the variable is initialized; in particular it means that whole
91  * contents of the variable is nullified unless said otherwise (and yes,
92  * even if initials is an empty list)
93  *
94  * @todo a better API for initializers?
95  */
97 
98  /**
99  * true if the variable is external (defined in another module)
100  */
101  bool isExtern;
102 
103  /**
104  * true if there is at least one instruction in the program that takes an
105  * address of the variable (or some part of it)
106  */
108 
109  /**
110  * dummy constructor
111  * @note known to be useful for internal purposes only
112  */
113  Var();
114  ~Var();
115 
116  /**
117  * wrap low-level operand to Var object
118  * @param code kind of variable
119  * @param op Pointer to low-level operand to be used for Var construction.
120  */
121  Var(EVar code, const struct cl_operand *op);
122 };
123 
124 bool isOnStack(const Var &);
125 
126 /**
127  * lookup container for set of Var objects
128  */
129 class VarDb {
130  private:
131  typedef STD_VECTOR(Var) TList;
132 
133  public:
135  typedef const_iterator iterator;
136 
137  public:
138  VarDb();
139  ~VarDb();
140 
141  /**
142  * look for a Var object by ID, add one if not found
143  * @param uid ID of variable to look for
144  * @return reference to either a found or just created Var object
145  */
146  Var& operator[](int uid);
147 
148  /**
149  * look for a Var object by ID, crash if not found
150  * @attention It is not safe to look for non-existing ID, it will jump
151  * to debugger in that case.
152  * @param uid ID of variable to look for
153  * @return reference to the found Var object
154  */
155  const Var& operator[](int uid) const;
156 
157  /**
158  * return STL-like iterator to go through the container
159  */
160  const_iterator begin() const { return vars_.begin(); }
161 
162  /**
163  * return STL-like iterator to go through the container
164  */
165  const_iterator end() const { return vars_.end(); }
166 
167  /**
168  * return count of variables stored in the container
169  */
170  size_t size() const { return vars_.size(); }
171 
172  private:
173  /// @b not allowed to be copied
174  VarDb(const VarDb &);
175 
176  /// @b not allowed to be copied
177  VarDb& operator=(const VarDb &);
178 
179  private:
181  struct Private;
182  Private *d;
183 };
184 
185 /**
186  * type lookup table
187  * @attention Type objects are not cloned for now (sort of exception).
188  * @todo Check if there is a front-end which really needs types to be cloned.
189  */
190 class TypeDb {
191  private:
192  typedef STD_VECTOR(const struct cl_type *) TList;
193 
194  public:
196  typedef const_iterator iterator;
197 
198  public:
199  TypeDb();
200  ~TypeDb();
201 
202  /**
203  * index given type for lookup
204  * @note useful only for builder
205  * @return true if the type was really added, false if it has been
206  * already there
207  */
208  bool insert(const struct cl_type *);
209 
210  /**
211  * type lookup by ID
212  * @attention The pointer returned is the same as formerly given to
213  * insert(). There is no cloning performed since it hasn't been
214  * considered useful for now.
215  */
216  const struct cl_type* operator[](int) const;
217 
218  /**
219  * return STL-like iterator to go through the container
220  */
221  const_iterator begin() const { return types_.begin(); }
222 
223  /**
224  * return STL-like iterator to go through the container
225  */
226  const_iterator end() const { return types_.end(); }
227 
228  /**
229  * return count of types stored in the container
230  */
231  size_t size() const { return types_.size(); }
232 
233  /**
234  * value of sizeof(void (*)()) in the analysed program, -1 if such
235  * information is not available
236  */
237  int codePtrSizeof() const;
238 
239  /**
240  * value of sizeof(void *) in the analysed program, -1 if such
241  * information is not available
242  */
243  int dataPtrSizeof() const;
244 
245  /// a (void *) type if available; if not, any data pointer; 0 otherwise
246  const struct cl_type* genericDataPtr() const;
247 
248  private:
249  /// @b not allowed to be copied
250  TypeDb(const TypeDb &);
251 
252  /// @b not allowed to be copied
253  TypeDb& operator=(const TypeDb &);
254 
255  private:
257  struct Private;
258  Private *d;
259 };
260 
261 /**
262  * Add the given type into TypeDb instance, then descent into the type and add
263  * all the referred types recursively.
264  * @param db TypeDb object to add all types to
265  * @param clt an arbitrary code listener type to be read
266  */
267 void readTypeTree(TypeDb &db, const struct cl_type *clt);
268 
269 /**
270  * name to UID mapping for global/static symbols
271  */
272 struct NameDb {
273  typedef std::map<std::string, int /* uid */> TNameMap;
274  typedef std::map<std::string, TNameMap> TFileMap;
275 
278 };
279 
280 class Block;
281 class ControlFlow;
282 struct Storage;
283 
284 /**
285  * generic STL-based list of cl_operand objects.
286  * They may or may not be deep-cloned, it depends on the particular purpose.
287  */
289 
290 /**
291  * generic STL-based list of Block pointers (useful to build CFG from Block
292  * objects)
293  */
294 typedef STD_VECTOR(const Block *) TTargetList;
295 
296 /**
297  * value type describing a single 'kill variable' hint
298  */
299 struct KillVar {
300  /// CodeStorage uid of the variable to kill
301  int uid;
302 
303  /// if true, killing the variable is safe only if nobody points at/inside it
305 
306  KillVar(int uid_, bool onlyIfNotPointed_):
307  uid(uid_),
308  onlyIfNotPointed(onlyIfNotPointed_)
309  {
310  }
311 };
312 
313 /// FIXME: inserting <id,0> and <id,1> could break the consistency of std::set
314 struct ltKillVar {
315  bool operator()(const KillVar &a, const KillVar &b) {
316  return a.uid < b.uid;
317  }
318 };
319 
320 /// list of variables to kill (a @b kill @b list)
321 typedef std::set<KillVar, ltKillVar> TKillVarList;
322 
323 /// list of kill lists (one per each target of a terminal instruction)
324 typedef std::vector<TKillVarList> TKillPerTarget;
325 
326 /**
327  * high-level representation of an intermediate code instruction
328  */
329 struct Insn {
330  /**
331  * instance of Storage which owns the Insn object
332  */
334 
335  /**
336  * reference to a basic block that owns the Insn object
337  */
339 
340  /**
341  * kind of instruction, see #cl_insn_e documentation
342  * @note now there can be also CL_INSN_CALL and CL_INSN_SWITCH
343  */
345 
346  /**
347  * some extra instructions partitioning, for now used by CL_INSN_UNOP and
348  * CL_INSN_BINOP
349  * @note this suffers from a little type info lost since it represent both
350  * enumeral types #cl_unop_e and #cl_binop_e by an integer.
351  */
352  int subCode;
353 
354  /**
355  * corresponding location in the original source code
356  */
357  struct cl_loc loc;
358 
359  /**
360  * List of all operands used by the instruction.
361  * Their particular semantic is highly dependent on @b type @b of @b the
362  * @b instruction. Let's go to summarize it:
363  *
364  * - (@b 0 operands) @b CL_INSN_JMP
365  *
366  * - (@b 1 operands) @b CL_INSN_COND:
367  * - [@b 0] - value to branch by
368  *
369  * - (@b 1 operands) @b CL_INSN_RET:
370  * - [@b 0] - value to return from fnc
371  *
372  * - (@b 0 operands) @b CL_INSN_ABORT
373  *
374  * - (@b 2 operands) @b CL_INSN_UNOP:
375  * - [@b 0] - destination
376  * - [@b 1] - source
377  *
378  * - (@b 3 operands) @b CL_INSN_BINOP:
379  * - [@b 0] - destination
380  * - [@b 1] - source 1
381  * - [@b 2] - source 2
382  *
383  * - (@b 2+ operands) @b CL_INSN_CALL:
384  * - [@b 0] - destination
385  * - [@b 1] - what are we going to call (mostly a fnc)
386  * - [@b 2..@b n] - call arguments
387  *
388  * - (@b 1+ operands) @b CL_INSN_SWITCH:
389  * - [@b 0] - value to switch by
390  * - [@b 1..@b n] - case values
391  *
392  * - (@b 1 operand) @b CL_INSN_LABEL:
393  * - [@b 0] - name of the label
394  */
396 
397  /// list of variables you can safely kill @b after execution of the insn
399 
400  /**
401  * similar to varsToKill, but refined for @b terminal instructions with more
402  * than one target. List of kill lists, one kill list per each target of
403  * the terminal instruction. If a variable is already killed by varsToKill,
404  * it does not appear in killPerTarget again.
405  */
407 
408  /**
409  * List of all target blocks - useful only for @b terminal @b instructions.
410  * Their particular semantic is highly dependent on @b type @b of @b the
411  * @b instruction. Let's go to summarize it:
412  *
413  * - (@b 1 target) @b CL_INSN_JMP:
414  * - [@b 0] - target of jump
415  *
416  * - (@b 2 targets) @b CL_INSN_COND:
417  * - [@b 0] - where to jump if condition holds
418  * - [@b 1] - where to jump if condition does not hold
419  *
420  * - (@b 0 targets) @b CL_INSN_RET
421  *
422  * - (@b 0 targets) @b CL_INSN_ABORT
423  *
424  * - (@b 1+ targets) @b CL_INSN_SWITCH:
425  * - [@b 0] - where to jump by default
426  * - [@b 1..@b n] - where to jump in corresponding cases
427  */
429 
430  /// list of indexes of targets that are closing a loop at the CFG level
431  std::vector<unsigned> loopClosingTargets;
432 };
433 
434 /**
435  * Basic block - a single node in ControlFlow graph. Once the basic block is
436  * ready, it contains (possibly empty) sequence of non-terminating instructions
437  * and exactly one terminating instruction.
438  */
439 class Block {
440  private:
441  typedef STD_VECTOR(const Insn *) TList;
442 
443  public:
445  typedef const_iterator iterator;
446 
447  public:
448  /**
449  * constructor useful to place objects into std::vector, but do @b not
450  * try to call append() on objects constructed this way. It would crash
451  * on a NULL pointer dereference.
452  */
453  Block():
454  cfg_(0)
455  {
456  }
457 
458  /**
459  * created a named basic block
460  * @param cfg pointer to control flow graph where the block belongs to
461  * @param name name of the basic block being constructed (zero ended C
462  * string)
463  */
464  Block(ControlFlow *cfg, const char *name):
465  cfg_(cfg),
466  name_(name)
467  {
468  }
469 
470  // NOTE: there is no destructor ... given objects are NOT destroyed
471 
472  /**
473  * return name of the basic block
474  */
475  const std::string& name() const {
476  return name_;
477  }
478 
479  /**
480  * append a given instruction to end of the block
481  * @param insn Instruction to append.
482  * @note Given objects are not cloned nor destroyed!
483  */
484  void append(Insn *insn);
485 
486  void appendPredecessor(Block *);
487 
488  /**
489  * return list of all direct successors
490  */
491  const TTargetList& targets() const;
492 
493  /**
494  * return list of all direct predecessors
495  */
496  const TTargetList& inbound() const { return inbound_; }
497 
498  /**
499  * return STL-like iterator to go through all the instructions inside
500  */
501  const_iterator begin() const { return insns_.begin(); }
502 
503  /**
504  * return STL-like iterator to go through all the instructions inside
505  */
506  const_iterator end() const { return insns_.end(); }
507 
508  /**
509  * return the first instruction in the basic block
510  */
511  const Insn* front() const;
512 
513  /**
514  * return the last instruction in the basic block
515  * @note This should be a @b terminal instruction once the CodeStorage
516  * model has been built.
517  */
518  const Insn* back() const;
519 
520  /**
521  * return count of instructions inside the basic block
522  */
523  size_t size() const { return insns_.size(); }
524 
525  /**
526  * direct access to instruction by its index.
527  * @param idx Index of instruction to access (staring with zero).
528  * @attention There is no range check performed.
529  */
530  const Insn* operator[](unsigned idx) const { return insns_[idx]; }
531 
532  /**
533  * return the ControlFlow object which the Block belongs to
534  */
535  const ControlFlow* cfg() const { return cfg_; }
536 
537  /// return true, if a loop at the level of CFG starts with this block
538  bool isLoopEntry() const;
539 
540  private:
544  std::string name_;
545 };
546 
547 /**
548  * Control flow graph - an easy to analyse representation of the intermediate
549  * code. Nodes of the graph are basic blocks - instances of Block
550  * @todo support for (non-recursive) call graph unfolding?
551  */
552 class ControlFlow {
553  private:
554  typedef STD_VECTOR(Block *) TList;
555 
556  public:
558  typedef const_iterator iterator;
559 
560  public:
561  ControlFlow();
562  ~ControlFlow();
563  ControlFlow(const ControlFlow &); ///< shallow copy
564  ControlFlow& operator=(const ControlFlow &); ///< shallow copy
565 
566  /**
567  * return entry basic block
568  */
569  const Block *entry() const;
570 
571  /**
572  * look for a basic block by name, create one if not found
573  * @param name name of the basic block to look for
574  * @return referenced pointer to either a found or just created Block
575  * @attention created objects will @b not be destroyed automatically
576  */
577  Block*& operator[](const char *name);
578 
579  /**
580  * look for a basic block by name, crash if not found
581  * @attention It is not safe to look for a non-existing basic block, it
582  * will jump to debugger in that case.
583  * @param name name of the basic block to look for
584  * @return pointer to the found Var object
585  */
586  const Block* operator[](const char *name) const;
587 
588  /**
589  * return STL-like iterator to go through all basic blocks inside
590  */
591  const_iterator begin() const { return bbs_.begin(); }
592 
593  /**
594  * return STL-like iterator to go through all basic blocks inside
595  */
596  const_iterator end() const { return bbs_.end(); }
597 
598  /**
599  * return count of basic blocks inside the control flow graph
600  */
601  size_t size() const { return bbs_.size(); }
602 
603  private:
605  struct Private;
606  Private *d;
607 };
608 
609 typedef std::vector<int> TArgByPos;
610 typedef std::set<int> TVarSet;
611 
612 namespace CallGraph {
613  struct Node;
614 }
615 
616 struct Fnc;
617 
618 namespace PointsTo {
619 
620 class Node;
621 
622 typedef enum {
626 } ItemCodeE;
627 
628 class Item {
629  public:
630  Item(ItemCodeE);
631  Item(const Var *v);
632 
633  bool isGlobal() const;
634  int uid() const;
635  const char *name() const;
636  /**
637  * return CodeStorage::Var pointer if the item represents PT_ITEM_VAR
638  * variable, otherwise return NULL.
639  */
640  const Var *var() const;
641 
642  public:
644 
645  union {
646  // NULL for black-hole
647  const Var *var;
648  // valid only for ITEM_RET
649  const Fnc *fnc;
650  // valid only for PT_ITEM_MALLOC
651  int mallocId;
652  } data;
653 
654  private:
655  Item();
656 };
657 
658 typedef std::vector<const Item *> TItemList;
659 typedef std::set<Node *> TNodeList;
660 
661 class Node {
662  public:
663  Node();
664  ~Node();
665 
666  public:
667  /// list of variables assigned to the node
669  /// out-edges edges are stored inside
671  /// in-edges are just 'synced' pointers
673  /// there should be only one black-hole / graph
675 };
676 
677 // In some types of PT-graphs (e.g. graph constructed by FICS algorithm) we can
678 // expect that one variable may be placed at most in one node of graph
679 // structure -- actual implementation covers this situation.
680 // FIXME: this should be generalized for graphs where the same variable may be
681 // placed in more than one node of points-to graph.
682 typedef std::map<int, Node *> TMap;
683 
684 class Graph {
685  public:
686  Graph(Fnc *fnc_ = 0) :
687  fnc(fnc_),
688  blackHole(0)
689  {
690  }
691 
692  public:
693  /// map variable uid to Item object
694  std::map<int, const Item *> uidToItem;
695  /// map item uid to concrete node
697  /// backward reference to FncDb (NULL for global graph)
699  /// shortcuts to global variables occurring in this graph
701  /// has this graph only one all-variables eating node?
702  const Node *blackHole;
703 };
704 
705 class GlobalData {
706  public:
707  /// graph corresponding to global variables
709  /// points-to build status
710  bool dead;
711 
712  public:
714  dead(false)
715  {
716  }
717 };
718 
719 bool isDead(const GlobalData &);
720 
721 } // namespace PoinstTo
722 
723 /**
724  * function definition
725  */
726 struct Fnc {
727  struct cl_operand def; ///< definition as low-level operand
728  Storage *stor; ///< owning Storage object
729  TVarSet vars; ///< uids of variables used by the fnc
730  TArgByPos args; ///< args uid addressed by arg position
731  ControlFlow cfg; ///< fnc code as control flow graph
732  PointsTo::Graph ptg; ///< points-to graph
733  CallGraph::Node *cgNode; ///< pointer to call-graph node or NULL
734 
735  Fnc():
736  stor(0),
737  ptg(this),
738  cgNode(0)
739  {
741  }
742 };
743 
744 /**
745  * return the name of given Fnc object (if any)
746  */
747 const char* nameOf(const Fnc &);
748 
749 /**
750  * return the location info associated with the given Fnc object (if any)
751  */
752 const struct cl_loc* locationOf(const Fnc &);
753 
754 /**
755  * return the uid of given Fnc object
756  */
757 int uidOf(const Fnc &);
758 
759 /**
760  * return true if the function is defined (means we have its CFG and the like)
761  */
762 bool isDefined(const Fnc &);
763 
764 /**
765  * lookup container for set of Fnc objects
766  */
767 class FncDb {
768  private:
769  typedef STD_VECTOR(Fnc *) TList;
770 
771  public:
773  typedef const_iterator iterator;
774 
775  public:
776  FncDb();
777  ~FncDb();
778  FncDb(const FncDb &); ///< shallow copy
779  FncDb& operator=(const FncDb &); ///< shallow copy
780 
781  /**
782  * look for a function by ID, create one if not found
783  * @param uid ID of the function to look for
784  * @return referenced pointer to either a found or just created Fnc obj
785  * @attention created objects will @b not be destroyed automatically
786  */
787  Fnc*& operator[](int uid);
788 
789  /**
790  * look for a function by ID, crash if not found
791  * @attention It is not safe to look for a non-existing function, it
792  * will jump to debugger in that case.
793  * @param uid ID of the function to look for
794  * @return pointer to the found Fnc object
795  */
796  const Fnc* operator[](int uid) const;
797 
798  /**
799  * return STL-like iterator to go through all functions inside
800  */
801  const_iterator begin() const { return fncs_.begin(); }
802 
803  /**
804  * return STL-like iterator to go through all functions inside
805  */
806  const_iterator end() const { return fncs_.end(); }
807 
808  /**
809  * return count of functions inside the container
810  */
811  size_t size() const { return fncs_.size(); }
812 
813  private:
815  struct Private;
816  Private *d;
817 };
818 
819 typedef std::vector<const Fnc *> TFncList;
820 typedef std::vector<const Insn *> TInsnList;
821 typedef std::map<Fnc *, TInsnList> TInsnListByFnc;
822 
823 namespace CallGraph {
824 
825  /// call-graph node (there is a 1:1 relation with Fnc, if the CG is built)
826  struct Node {
828 
829  /// list of calls sorted by called fnc, zero key means an indirect call
831 
832  /// list of direct call instructions that call this function
834 
835  /// insns that take address of this function, zero key means initializer
837 
838  Node(Fnc *fnc_):
839  fnc(fnc_)
840  {
841  }
842  };
843 
844  typedef std::set<Node *> TNodeList;
845 
846  struct Graph {
849 
852 
854 
855  Graph():
856  hasIndirectCall(false),
857  hasCallback(false)
858  {
859  }
860  };
861 
862 } // namespace CallGraph
863 
864 /**
865  * a value type representing the @b whole @b serialised @b model of code
866  */
867 struct Storage {
868  TypeDb types; ///< type info lookup container
869  VarDb vars; ///< variables lookup container
870  FncDb fncs; ///< functions lookup container
871  NameDb varNames; ///< var names lookup container
872  NameDb fncNames; ///< fnc names lookup container
873  CallGraph::Graph callGraph; ///< call graph globals
874  PointsTo::GlobalData ptd; ///< global PT-info
875 };
876 
877 /// return the pointer to the Fnc object that the cfg instance is @b wrapped by
878 const Fnc* fncByCfg(const ControlFlow *cfg);
879 
880 } // namespace CodeStorage
881 
882 #endif /* H_GUARD_STORAGE_H */