//===-- UnixStreamChecker.cpp -----------------------------------------*- C++
//-*--//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Defines a checker for proper use of open/creat/fdopen/close APIs.
//   - If a file has been closed with close, it should not be accessed again.
//   Accessing a closed file results in undefined behavior.
//   - If a file was opened with open/creat, it must be closed with close before
//   the execution ends. Failing to do so results in a resource leak.
//
//===----------------------------------------------------------------------===//

#include "ClangSACheckers.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/Analysis/Analyses/LiveVariables.h"
#include "clang/AST/Attr.h"

using namespace clang;
using namespace ento;

llvm::DenseMap<const VarDecl *, bool> VDMap;

namespace {
class FindEscaped {
public:
  llvm::SmallPtrSet<const VarDecl *, 20> Escaped;

  void operator()(const Stmt *S) {
    // Check for '&'. Any VarDecl whose address has been taken we treat as
    // escaped.
    // FIXME: What about references?
    const UnaryOperator *U = dyn_cast<UnaryOperator>(S);
    if (!U)
      return;
    if (U->getOpcode() != UO_AddrOf)
      return;

    const Expr *E = U->getSubExpr()->IgnoreParenCasts();
    if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E))
      if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl()))
        Escaped.insert(VD);
  }
};
} // end anonymous namespace

namespace {
class DeadStoreObs : public LiveVariables::Observer {
  const CFG &cfg;
  ASTContext &Ctx;
  BugReporter &BR;
  AnalysisDeclContext *AC;
  ParentMap &Parents;
  llvm::SmallPtrSet<const VarDecl *, 20> Escaped;
  const CFGBlock *currentBlock;
  enum DeadStoreKind { Standard, Enclosing, DeadIncrement, DeadInit };

public:
  DeadStoreObs(const CFG &cfg, ASTContext &ctx, BugReporter &br,
               AnalysisDeclContext *ac, ParentMap &parents,
               llvm::SmallPtrSet<const VarDecl *, 20> &escaped)
      : cfg(cfg), Ctx(ctx), BR(br), AC(ac), Parents(parents), Escaped(escaped),
        currentBlock(0) {}

  virtual ~DeadStoreObs() {}

  bool isLive(const LiveVariables::LivenessValues &Live, const VarDecl *D) {
    if (Live.isLive(D))
      return true;
    return false;
  }

  virtual void observeStmt(const Stmt *S, const CFGBlock *block,
                           const LiveVariables::LivenessValues &Live) {

    currentBlock = block;

    // Skip statements in macros.
    if (S->getLocStart().isMacroID())
      return;
    if (const DeclStmt *DS = dyn_cast<DeclStmt>(S))
      // Iterate through the decls.  Warn if any initializers are complex
      // expressions that are not live (never used).
      for (DeclStmt::const_decl_iterator DI = DS->decl_begin(),
                                         DE = DS->decl_end();
           DI != DE; ++DI) {

        VarDecl *V = dyn_cast<VarDecl>(*DI);

        if (!V)
          continue;

        if (V->hasLocalStorage()) {
          // Reference types confuse the dead stores checker.  Skip them
          // for now.
          if (V->getType()->getAs<ReferenceType>())
            return;

          if (const Expr *E = V->getInit()) {
            while (const ExprWithCleanups *exprClean =
                       dyn_cast<ExprWithCleanups>(E))
              E = exprClean->getSubExpr();

            // Don't warn on C++ objects (yet) until we can show that their
            // constructors/destructors don't have side effects.
            if (isa<CXXConstructExpr>(E))
              return;

            // A dead initialization is a variable that is dead after it
            // is initialized.  We don't flag warnings for those variables
            // marked 'unused'.
            if (!isLive(Live, V) && V->getAttr<UnusedAttr>() == 0) {
              // Special case: check for initializations with constants.
              //
              //  e.g. : int x = 0;
              //
              // If x is EVER assigned a new value later, don't issue
              // a warning.  This is because such initialization can be
              // due to defensive programming.
              if (E->isEvaluatable(Ctx))
                return;

              if (const DeclRefExpr *DRE =
                      dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()))
                if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
                  // Special case: check for initialization from constant
                  //  variables.
                  //
                  //  e.g. extern const int MyConstant;
                  //       int x = MyConstant;
                  //
                  if (VD->hasGlobalStorage() &&
                      VD->getType().isConstQualified())
                    return;
                  // Special case: check for initialization from scalar
                  //  parameters.  This is often a form of defensive
                  //  programming.  Non-scalars are still an error since
                  //  because it more likely represents an actual algorithmic
                  //  bug.
                  if (isa<ParmVarDecl>(VD) && VD->getType()->isScalarType())
                    return;
                }
              VDMap[V] = true;
            }
          }
        }
      }
  }
};

} // end anonymous namespace

namespace {
typedef llvm::SmallVector<SymbolRef, 2> SymbolVector;

struct StreamState {
private:
  enum Kind { Opened, Closed, Escaped } K;
  StreamState(Kind InK) : K(InK) {}

public:
  bool isOpened() const { return K == Opened; }
  bool isClosed() const { return K == Closed; }
  bool isEscaped() const { return K == Escaped; }

  static StreamState getOpened() { return StreamState(Opened); }
  static StreamState getClosed() { return StreamState(Closed); }
  static StreamState getEscaped() { return StreamState(Escaped); }

  bool operator==(const StreamState &X) const { return K == X.K; }
  void Profile(llvm::FoldingSetNodeID &ID) const { ID.AddInteger(K); }
};

class UnixStreamBugVisitor
    : public BugReporterVisitorImpl<UnixStreamBugVisitor> {

  // The allocated region tracked by the main analysis.
  SymbolRef Sym;
  bool IsLeak;

public:
  UnixStreamBugVisitor(SymbolRef S, bool isLeak = false)
      : Sym(S), IsLeak(isLeak) {}

  virtual ~UnixStreamBugVisitor() {}

  void Profile(llvm::FoldingSetNodeID &ID) const {
    static int X = 0;
    ID.AddPointer(&X);
    ID.AddPointer(Sym);
  }

  inline bool isOpened(const StreamState *S, const StreamState *SPrev,
                       const Stmt *Stmt) {
    return (Stmt && isa<CallExpr>(Stmt) && (S && S->isOpened()) &&
            (!SPrev || !SPrev->isOpened()));
  }

  inline bool isClosed(const StreamState *S, const StreamState *SPrev,
                       const Stmt *Stmt) {
    return (Stmt && isa<CallExpr>(Stmt) && (S && S->isClosed()) &&
            (!SPrev || !SPrev->isClosed()));
  }

  PathDiagnosticPiece *VisitNode(const ExplodedNode *N,
                                 const ExplodedNode *PrevN,
                                 BugReporterContext &BRC, BugReport &BR);

  std::unique_ptr<PathDiagnosticPiece>
  getEndPath(BugReporterContext &BRC, const ExplodedNode *EndPathNode,
             BugReport &BR) {
    if (!IsLeak)
      return 0;

    PathDiagnosticLocation L = PathDiagnosticLocation::createEndOfPath(
        EndPathNode, *(EndPathNode->getSourceManager()));
    return llvm::make_unique<PathDiagnosticEventPiece>(
        L, BR.getDescription(), EndPathNode->getSourceManager(), false);
  }
};

class UnixStreamChecker
    : public Checker<check::PostCall, check::PreCall, check::DeadSymbols,
                     check::Bind, check::ASTCodeBody> {

  mutable IdentifierInfo *IIopen, *IIcreat, *II_fdopen, *II_dup, *IIfclose,
      *IIclose;

  mutable std::unique_ptr<BugType> DoubleCloseBugType;
  mutable std::unique_ptr<BugType> LeakBugType;

  void initIdentifierInfo(ASTContext &Ctx) const;

  void reportDoubleClose(SymbolRef FileIDSym, const CallEvent &Call,
                         CheckerContext &C) const;

  void reportLeaks(SymbolVector LeakedStreams, CheckerContext &C,
                   ExplodedNode *ErrNode) const;

public:
  UnixStreamChecker();
  ~UnixStreamChecker() { VDMap.clear(); }

  std::unique_ptr<UnixStreamBugVisitor>
  getUnixStreamBugVisitor(SymbolRef S, bool isLeak = false) const {
    return llvm::make_unique<UnixStreamBugVisitor>(S, isLeak);
  }

  /// Process open/creat.
  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;

  /// Process close.
  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;

  void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;

  /// Deal with symbol escape as a byproduct of a bind.
  void checkBind(SVal location, SVal val, const Stmt *S,
                 CheckerContext &C) const;

  void checkASTCodeBody(const Decl *D, AnalysisManager &mgr,
                        BugReporter &BR) const;
};

} // end anonymous namespace

/// The state of the checker is a map from tracked stream symbols to their
/// state. Let's store it in the ProgramState.
REGISTER_MAP_WITH_PROGRAMSTATE(StreamMap, SymbolRef, StreamState)
REGISTER_MAP_WITH_PROGRAMSTATE(StreamValMap, SymbolRef, SVal)

PathDiagnosticPiece *UnixStreamBugVisitor::VisitNode(const ExplodedNode *N,
                                                     const ExplodedNode *PrevN,
                                                     BugReporterContext &BRC,
                                                     BugReport &BR) {

  ProgramStateRef state = N->getState();
  ProgramStateRef statePrev = PrevN->getState();

  const StreamState *RS = state->get<StreamMap>(Sym);
  const StreamState *RSPrev = statePrev->get<StreamMap>(Sym);
  if (!RS)
    return 0;

  const Stmt *S = 0;
  const char *Msg = 0;
  StackHintGeneratorForSymbol *StackHint = 0;

  // Retrieve the associated statement.
  ProgramPoint ProgLoc = N->getLocation();
  if (Optional<StmtPoint> SP = ProgLoc.getAs<StmtPoint>())
    S = SP->getStmt();
  else if (Optional<CallExitEnd> Exit = ProgLoc.getAs<CallExitEnd>())
    S = Exit->getCalleeContext()->getCallSite();
  // If an assumption was made on a branch, it should be caught
  // here by looking at the state transition.
  else if (Optional<BlockEdge> Edge = ProgLoc.getAs<BlockEdge>()) {
    const CFGBlock *srcBlk = Edge->getSrc();
    S = srcBlk->getTerminator();
  }
  if (!S)
    return 0;

  if (isOpened(RS, RSPrev, S)) {
    Msg = "File is opened";
    StackHint = new StackHintGeneratorForSymbol(Sym, "Opened File");
  } else if (isClosed(RS, RSPrev, S)) {
    Msg = "File is closed";
    StackHint = new StackHintGeneratorForSymbol(Sym, "Closed File");
  }

  if (!Msg)
    return 0;
  assert(StackHint);

  // Generate the extra diagnostic.
  PathDiagnosticLocation Pos(S, *(N->getSourceManager()),
                             N->getLocationContext());
  return new PathDiagnosticEventPiece(Pos, Msg, N->getSourceManager(), true,
                                      StackHint);
}

UnixStreamChecker::UnixStreamChecker()
    : IIopen(0), IIcreat(0), II_fdopen(0), II_dup(0), IIfclose(0), IIclose(0) {
  // Initialize the bug types.
  DoubleCloseBugType.reset(
      new BugType(this, "Double close", "Unix Stream API Error"));

  LeakBugType.reset(
      new BugType(this, "Resource Leak", "Unix Stream API Error"));
  // Sinks are higher importance bugs as well as calls to assert() or exit(0).
  LeakBugType->setSuppressOnSink(true);
}

void UnixStreamChecker::checkASTCodeBody(const Decl *D, AnalysisManager &mgr,
                                         BugReporter &BR) const {
  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
    if (FD->isTemplateInstantiation())
      return;

  if (LiveVariables *L = mgr.getAnalysis<LiveVariables>(D)) {
    CFG &cfg = *mgr.getCFG(D);
    AnalysisDeclContext *AC = mgr.getAnalysisDeclContext(D);
    ParentMap &pmap = mgr.getParentMap(D);
    FindEscaped FS;
    cfg.VisitBlockStmts(FS);
    DeadStoreObs A(cfg, BR.getContext(), BR, AC, pmap, FS.Escaped);
    L->runOnAllBlocks(A);
  }
}

void UnixStreamChecker::checkPostCall(const CallEvent &Call,
                                      CheckerContext &C) const {
  initIdentifierInfo(C.getASTContext());
  ProgramStateRef State = C.getState();

  if (!Call.isGlobalCFunction())
    return;

  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
  if (!FD)
    return;

  if (Call.getCalleeIdentifier() != IIopen && FD->getNameAsString() != "open" &&
      Call.getCalleeIdentifier() != IIcreat &&
      FD->getNameAsString() != "creat" &&
      Call.getCalleeIdentifier() != II_fdopen &&
      FD->getNameAsString() != "fdopen")
    return;

  if (Call.getCalleeIdentifier() == II_fdopen ||
      FD->getNameAsString() == "fdopen") {
    // Get the symbolic value corresponding to the file handle.
    SymbolRef FileID = Call.getArgSVal(0).getAsSymbol();
    if (!FileID)
      return;

    const StreamState *SS = State->get<StreamMap>(FileID);
    if (!(SS && SS->isOpened()))
      return;
    State = State->set<StreamMap>(FileID, StreamState::getClosed());
    C.addTransition(State);
  }
  // Get the symbolic value corresponding to the file handle.
  SymbolRef FileID = Call.getReturnValue().getAsSymbol();
  if (!FileID)
    return;
  // Generate the next transition (an edge in the exploded graph).
  State = State->set<StreamMap>(FileID, StreamState::getOpened());
  C.addTransition(State);
}

void UnixStreamChecker::checkPreCall(const CallEvent &Call,
                                     CheckerContext &C) const {
  initIdentifierInfo(C.getASTContext());

  if (!Call.isGlobalCFunction())
    return;

  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
  if (!FD)
    return;

  if (Call.getCalleeIdentifier() != IIclose &&
      FD->getNameAsString() != "close" &&
      Call.getCalleeIdentifier() != IIfclose &&
      FD->getNameAsString() != "fclose" &&
      Call.getCalleeIdentifier() != II_dup && FD->getNameAsString() != "dup")
    return;

  if (Call.getNumArgs() < 1)
    return;

  if (Call.getCalleeIdentifier() == II_dup || FD->getNameAsString() == "dup") {
    // TODO: Handle dup mark symbol as escpaed for now and stop tracking.
    // This is not handled in Bada Potential Checker as well. Handle this in
    // enhancements
    ProgramStateRef State = C.getState();
    SymbolRef FileID = Call.getArgSVal(0).getAsSymbol();
    if (!FileID)
      return;
    State = State->set<StreamMap>(FileID, StreamState::getEscaped());
    C.addTransition(State);
    return;
  }
  // Get the symbolic value corresponding to the file handle.
  SymbolRef FileID = Call.getArgSVal(0).getAsSymbol();
  if (!FileID)
    return;

  // Check if the stream has already been closed.
  ProgramStateRef State = C.getState();
  const StreamState *SS = State->get<StreamMap>(FileID);
  if (SS && SS->isClosed()) {
    reportDoubleClose(FileID, Call, C);
    return;
  }

  // Generate the next transition, in which the stream is closed.
  State = State->set<StreamMap>(FileID, StreamState::getClosed());
  C.addTransition(State);
}

static bool isLeaked(SymbolRef Sym, const StreamState &SS, bool IsSymDead,
                     ProgramStateRef State) {
  if (IsSymDead && SS.isOpened()) {
    // If a symbol is NULL, assume that open failed on this path.
    // A symbol should only be considered leaked if it is non-null.
    ConstraintManager &CMgr = State->getConstraintManager();
    ConditionTruthVal OpenFailed = CMgr.isNull(State, Sym);
    return !OpenFailed.isConstrainedTrue();
  }
  return false;
}

// Check dead symbols and collect the leaked streams
void UnixStreamChecker::checkDeadSymbols(SymbolReaper &SymReaper,
                                         CheckerContext &C) const {
  ProgramStateRef State = C.getState();
  SymbolVector LeakedStreams;
  StreamMapTy TrackedStreams = State->get<StreamMap>();
  for (StreamMapTy::iterator I = TrackedStreams.begin(),
                             E = TrackedStreams.end();
       I != E; ++I) {
    SymbolRef Sym = I->first;
    bool IsSymDead = SymReaper.isDead(Sym);
    // Collect leaked symbols.
    if (isLeaked(Sym, I->second, IsSymDead, State))
      LeakedStreams.push_back(Sym);
    // Remove the dead symbol from the streams map.
    if (IsSymDead)
      State = State->remove<StreamMap>(Sym);
  }
  // Update State
  ExplodedNode *N = C.addTransition(State);
  reportLeaks(LeakedStreams, C, N);
}

void UnixStreamChecker::reportDoubleClose(SymbolRef FileIDSym,
                                          const CallEvent &Call,
                                          CheckerContext &C) const {
  ProgramStateRef State = C.getState();
  ExplodedNode *ErrNode = C.addTransition(State);
  // If we've already reached this node on another path, return.
  if (!ErrNode)
    return;

  // Generate the report.
  auto R = llvm::make_unique<BugReport>(
      *DoubleCloseBugType, "Closing a previously closed file stream", ErrNode);
  R->addRange(Call.getSourceRange());
  R->markInteresting(FileIDSym);
  C.emitReport(std::move(R));
}

void UnixStreamChecker::reportLeaks(SymbolVector LeakedStreams,
                                    CheckerContext &C,
                                    ExplodedNode *ErrNode) const {
  ProgramStateRef State = C.getState();
  for (llvm::SmallVector<SymbolRef, 2>::iterator I = LeakedStreams.begin(),
                                                 E = LeakedStreams.end();
       I != E; ++I) {

    SmallString<200> buf;
    llvm::raw_svector_ostream os(buf);
    SymbolRef SR = *I;
    if (!SR)
      continue;
    const SVal *V = State->get<StreamValMap>(SR);
    if (!V)
      continue;
    const MemRegion *boundVarReg = V->getAsRegion();
    if (!boundVarReg)
      return;
    const VarRegion *VR = dyn_cast<VarRegion>(boundVarReg);
    const VarDecl *VD;
    if (VR)
      VD = VR->getDecl();
    else
      VD = NULL;
    os << "Opened file is never closed; potential resource leak";
    if (VD) {
      os << " of resource pointed to by '";
      os << VD->getName();
      os << '\'';
    }

    auto R = llvm::make_unique<BugReport>(*LeakBugType, os.str(), ErrNode);
    R->markInteresting(*I);
    C.emitReport(std::move(R));
  }
}

void UnixStreamChecker::checkBind(SVal location, SVal val, const Stmt *S,
                                  CheckerContext &C) const {
  SymbolRef FileID = val.getAsSymbol();
  ProgramStateRef State = C.getState();
  const StreamState *SS = State->get<StreamMap>(FileID);
  if (!SS)
    return;
  if (!SS->isOpened())
    return;
  if (const VarRegion *LVR =
          dyn_cast_or_null<VarRegion>(location.getAsRegion())) {
    const VarDecl *VD = LVR->getDecl();
    llvm::DenseMap<const VarDecl *, bool>::iterator DeadVar = VDMap.find(VD);
    if (DeadVar != VDMap.end()) {
      if (DeadVar->second) {
        ExplodedNode *ErrNode = C.addTransition(State);
        SmallString<200> buf;
        llvm::raw_svector_ostream os(buf);
        os << "Opened file is never closed; potential resource leak";
        if (VD) {
          os << " of resource pointed to by '";
          os << VD->getName();
          os << '\'';
        }
        auto R = llvm::make_unique<BugReport>(*LeakBugType, os.str(), ErrNode);
        R->markInteresting(FileID);
        C.emitReport(std::move(R));
        VDMap.erase(DeadVar);
        return;
      }
    }
  }
  State = State->set<StreamValMap>(FileID, location);
  C.addTransition(State);
}

void UnixStreamChecker::initIdentifierInfo(ASTContext &Ctx) const {
  if (IIopen)
    return;
  IIopen = &Ctx.Idents.get("open");
  IIcreat = &Ctx.Idents.get("creat");
  II_fdopen = &Ctx.Idents.get("fdopen");
  II_dup = &Ctx.Idents.get("dup");
  IIfclose = &Ctx.Idents.get("fclose");
  IIclose = &Ctx.Idents.get("close");
}

void ento::registerUnixStreamChecker(CheckerManager &mgr) {
  mgr.registerChecker<UnixStreamChecker>();
}
