1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
| //== MIGChecker.cpp - MIG calling convention checker ------------*- C++ -*--==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines MIGChecker, a Mach Interface Generator calling convention
// checker. Namely, in MIG callback implementation the following rules apply:
// - When a server routine returns an error code that represents success, it
// must take ownership of resources passed to it (and eventually release
// them).
// - Additionally, when returning success, all out-parameters must be
// initialized.
// - When it returns any other error code, it must not take ownership,
// because the message and its out-of-line parameters will be destroyed
// by the client that called the function.
// For now we only check the last rule, as its violations lead to dangerous
// use-after-free exploits.
//
//===----------------------------------------------------------------------===//
#include "clang/Analysis/AnyCall.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
using namespace clang;
using namespace ento;
namespace {
class MIGChecker : public Checker<check::PostCall, check::PreStmt<ReturnStmt>,
check::EndFunction> {
BugType BT{this, "Use-after-free (MIG calling convention violation)",
categories::MemoryError};
// The checker knows that an out-of-line object is deallocated if it is
// passed as an argument to one of these functions. If this object is
// additionally an argument of a MIG routine, the checker keeps track of that
// information and issues a warning when an error is returned from the
// respective routine.
std::vector<std::pair<CallDescription, unsigned>> Deallocators = {
#define CALL(required_args, deallocated_arg, ...) \
{{{__VA_ARGS__}, required_args}, deallocated_arg}
// E.g., if the checker sees a C function 'vm_deallocate' that is
// defined on class 'IOUserClient' that has exactly 3 parameters, it knows
// that argument #1 (starting from 0, i.e. the second argument) is going
// to be consumed in the sense of the MIG consume-on-success convention.
CALL(3, 1, "vm_deallocate"),
CALL(3, 1, "mach_vm_deallocate"),
CALL(2, 0, "mig_deallocate"),
CALL(2, 1, "mach_port_deallocate"),
CALL(1, 0, "device_deallocate"),
CALL(1, 0, "iokit_remove_connect_reference"),
CALL(1, 0, "iokit_remove_reference"),
CALL(1, 0, "iokit_release_port"),
CALL(1, 0, "ipc_port_release"),
CALL(1, 0, "ipc_port_release_sonce"),
CALL(1, 0, "ipc_voucher_attr_control_release"),
CALL(1, 0, "ipc_voucher_release"),
CALL(1, 0, "lock_set_dereference"),
CALL(1, 0, "memory_object_control_deallocate"),
CALL(1, 0, "pset_deallocate"),
CALL(1, 0, "semaphore_dereference"),
CALL(1, 0, "space_deallocate"),
CALL(1, 0, "space_inspect_deallocate"),
CALL(1, 0, "task_deallocate"),
CALL(1, 0, "task_inspect_deallocate"),
CALL(1, 0, "task_name_deallocate"),
CALL(1, 0, "thread_deallocate"),
CALL(1, 0, "thread_inspect_deallocate"),
CALL(1, 0, "upl_deallocate"),
CALL(1, 0, "vm_map_deallocate"),
// E.g., if the checker sees a method 'releaseAsyncReference64()' that is
// defined on class 'IOUserClient' that takes exactly 1 argument, it knows
// that the argument is going to be consumed in the sense of the MIG
// consume-on-success convention.
CALL(1, 0, "IOUserClient", "releaseAsyncReference64"),
CALL(1, 0, "IOUserClient", "releaseNotificationPort"),
#undef CALL
};
CallDescription OsRefRetain{"os_ref_retain", 1};
void checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const;
public:
void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
// HACK: We're making two attempts to find the bug: checkEndFunction
// should normally be enough but it fails when the return value is a literal
// that never gets put into the Environment and ends of function with multiple
// returns get agglutinated across returns, preventing us from obtaining
// the return value. The problem is similar to https://reviews.llvm.org/D25326
// but now we step into it in the top-level function.
void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const {
checkReturnAux(RS, C);
}
void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const {
checkReturnAux(RS, C);
}
};
} // end anonymous namespace
// A flag that says that the programmer has called a MIG destructor
// for at least one parameter.
REGISTER_TRAIT_WITH_PROGRAMSTATE(ReleasedParameter, bool)
// A set of parameters for which the check is suppressed because
// reference counting is being performed.
REGISTER_SET_WITH_PROGRAMSTATE(RefCountedParameters, const ParmVarDecl *)
static const ParmVarDecl *getOriginParam(SVal V, CheckerContext &C,
bool IncludeBaseRegions = false) {
// TODO: We should most likely always include base regions here.
SymbolRef Sym = V.getAsSymbol(IncludeBaseRegions);
if (!Sym)
return nullptr;
// If we optimistically assume that the MIG routine never re-uses the storage
// that was passed to it as arguments when it invalidates it (but at most when
// it assigns to parameter variables directly), this procedure correctly
// determines if the value was loaded from the transitive closure of MIG
// routine arguments in the heap.
while (const MemRegion *MR = Sym->getOriginRegion()) {
const auto *VR = dyn_cast<VarRegion>(MR);
if (VR && VR->hasStackParametersStorage() &&
VR->getStackFrame()->inTopFrame())
return cast<ParmVarDecl>(VR->getDecl());
const SymbolicRegion *SR = MR->getSymbolicBase();
if (!SR)
return nullptr;
Sym = SR->getSymbol();
}
return nullptr;
}
static bool isInMIGCall(CheckerContext &C) {
const LocationContext *LC = C.getLocationContext();
assert(LC && "Unknown location context");
const StackFrameContext *SFC;
// Find the top frame.
while (LC) {
SFC = LC->getStackFrame();
LC = SFC->getParent();
}
const Decl *D = SFC->getDecl();
if (Optional<AnyCall> AC = AnyCall::forDecl(D)) {
// Even though there's a Sema warning when the return type of an annotated
// function is not a kern_return_t, this warning isn't an error, so we need
// an extra sanity check here.
// FIXME: AnyCall doesn't support blocks yet, so they remain unchecked
// for now.
if (!AC->getReturnType(C.getASTContext())
.getCanonicalType()->isSignedIntegerType())
return false;
}
if (D->hasAttr<MIGServerRoutineAttr>())
return true;
// See if there's an annotated method in the superclass.
if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
for (const auto *OMD: MD->overridden_methods())
if (OMD->hasAttr<MIGServerRoutineAttr>())
return true;
return false;
}
void MIGChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const {
if (Call.isCalled(OsRefRetain)) {
// If the code is doing reference counting over the parameter,
// it opens up an opportunity for safely calling a destructor function.
// TODO: We should still check for over-releases.
if (const ParmVarDecl *PVD =
getOriginParam(Call.getArgSVal(0), C, /*IncludeBaseRegions=*/true)) {
// We never need to clean up the program state because these are
// top-level parameters anyway, so they're always live.
C.addTransition(C.getState()->add<RefCountedParameters>(PVD));
}
return;
}
if (!isInMIGCall(C))
return;
auto I = llvm::find_if(Deallocators,
[&](const std::pair<CallDescription, unsigned> &Item) {
return Call.isCalled(Item.first);
});
if (I == Deallocators.end())
return;
ProgramStateRef State = C.getState();
unsigned ArgIdx = I->second;
SVal Arg = Call.getArgSVal(ArgIdx);
const ParmVarDecl *PVD = getOriginParam(Arg, C);
if (!PVD || State->contains<RefCountedParameters>(PVD))
return;
const NoteTag *T = C.getNoteTag([this, PVD](BugReport &BR) -> std::string {
if (&BR.getBugType() != &BT)
return "";
SmallString<64> Str;
llvm::raw_svector_ostream OS(Str);
OS << "Value passed through parameter '" << PVD->getName()
<< "\' is deallocated";
return OS.str();
});
C.addTransition(State->set<ReleasedParameter>(true), T);
}
// Returns true if V can potentially represent a "successful" kern_return_t.
static bool mayBeSuccess(SVal V, CheckerContext &C) {
ProgramStateRef State = C.getState();
// Can V represent KERN_SUCCESS?
if (!State->isNull(V).isConstrainedFalse())
return true;
SValBuilder &SVB = C.getSValBuilder();
ASTContext &ACtx = C.getASTContext();
// Can V represent MIG_NO_REPLY?
static const int MigNoReply = -305;
V = SVB.evalEQ(C.getState(), V, SVB.makeIntVal(MigNoReply, ACtx.IntTy));
if (!State->isNull(V).isConstrainedTrue())
return true;
// If none of the above, it's definitely an error.
return false;
}
void MIGChecker::checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const {
// It is very unlikely that a MIG callback will be called from anywhere
// within the project under analysis and the caller isn't itself a routine
// that follows the MIG calling convention. Therefore we're safe to believe
// that it's always the top frame that is of interest. There's a slight chance
// that the user would want to enforce the MIG calling convention upon
// a random routine in the middle of nowhere, but given that the convention is
// fairly weird and hard to follow in the first place, there's relatively
// little motivation to spread it this way.
if (!C.inTopFrame())
return;
if (!isInMIGCall(C))
return;
// We know that the function is non-void, but what if the return statement
// is not there in the code? It's not a compile error, we should not crash.
if (!RS)
return;
ProgramStateRef State = C.getState();
if (!State->get<ReleasedParameter>())
return;
SVal V = C.getSVal(RS);
if (mayBeSuccess(V, C))
return;
ExplodedNode *N = C.generateErrorNode();
if (!N)
return;
auto R = std::make_unique<PathSensitiveBugReport>(
BT,
"MIG callback fails with error after deallocating argument value. "
"This is a use-after-free vulnerability because the caller will try to "
"deallocate it again",
N);
R->addRange(RS->getSourceRange());
bugreporter::trackExpressionValue(N, RS->getRetValue(), *R,
bugreporter::TrackingKind::Thorough, false);
C.emitReport(std::move(R));
}
void ento::registerMIGChecker(CheckerManager &Mgr) {
Mgr.registerChecker<MIGChecker>();
}
bool ento::shouldRegisterMIGChecker(const LangOptions &LO) {
return true;
}
|