pepperfish_profiler.lua 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. --[[
  2. == Introduction ==
  3. Note that this requires os.clock(), debug.sethook(),
  4. and debug.getinfo() or your equivalent replacements to
  5. be available if this is an embedded application.
  6. Example usage:
  7. profiler = newProfiler()
  8. profiler:start()
  9. < call some functions that take time >
  10. profiler:stop()
  11. local outfile = io.open( "profile.txt", "w+" )
  12. profiler:report( outfile )
  13. outfile:close()
  14. == Optionally choosing profiling method ==
  15. The rest of this comment can be ignored if you merely want a good profiler.
  16. newProfiler(method, sampledelay):
  17. If method is omitted or "time", will profile based on real performance.
  18. optionally, frequency can be provided to control the number of opcodes
  19. per profiling tick. By default this is 100000, which (on my system) provides
  20. one tick approximately every 2ms and reduces system performance by about 10%.
  21. This can be reduced to increase accuracy at the cost of performance, or
  22. increased for the opposite effect.
  23. If method is "call", will profile based on function calls. Frequency is
  24. ignored.
  25. "time" may bias profiling somewhat towards large areas with "simple opcodes",
  26. as the profiling function (which introduces a certain amount of unavoidable
  27. overhead) will be called more often. This can be minimized by using a larger
  28. sample delay - the default should leave any error largely overshadowed by
  29. statistical noise. With a delay of 1000 I was able to achieve inaccuray of
  30. approximately 25%. Increasing the delay to 100000 left inaccuracy below my
  31. testing error.
  32. "call" may bias profiling heavily towards areas with many function calls.
  33. Testing found a degenerate case giving a figure inaccurate by approximately
  34. 20,000%. (Yes, a multiple of 200.) This is, however, more directly comparable
  35. to common profilers (such as gprof) and also gives accurate function call
  36. counts, which cannot be retrieved from "time".
  37. I strongly recommend "time" mode, and it is now the default.
  38. == History ==
  39. 2008-09-16 - Time-based profiling and conversion to Lua 5.1
  40. by Ben Wilhelm ( [email protected] ).
  41. Added the ability to optionally choose profiling methods, along with a new
  42. profiling method.
  43. Converted to Lua 5, a few improvements, and
  44. additional documentation by Tom Spilman ( [email protected] )
  45. Additional corrections and tidying by original author
  46. Daniel Silverstone ( [email protected] )
  47. == Status ==
  48. Daniel Silverstone is no longer using this code, and judging by how long it's
  49. been waiting for Lua 5.1 support, I don't think Tom Spilman is either. I'm
  50. perfectly willing to take on maintenance, so if you have problems or
  51. questions, go ahead and email me :)
  52. -- Ben Wilhelm ( [email protected] ) '
  53. == Copyright ==
  54. Lua profiler - Copyright Pepperfish 2002,2003,2004
  55. Permission is hereby granted, free of charge, to any person obtaining a copy
  56. of this software and associated documentation files (the "Software"), to
  57. deal in the Software without restriction, including without limitation the
  58. rights to use, copy, modify, merge, publish, distribute, and/or sell copies
  59. of the Software, and to permit persons to whom the Software is furnished to
  60. do so, subject to the following conditions:
  61. The above copyright notice and this permission notice shall be included in
  62. all copies or substantial portions of the Software.
  63. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  64. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  65. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  66. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  67. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  68. FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  69. IN THE SOFTWARE.
  70. --]]
  71. --
  72. -- All profiler related stuff is stored in the top level table '_profiler'
  73. --
  74. _profiler = {}
  75. --
  76. -- newProfiler() creates a new profiler object for managing
  77. -- the profiler and storing state. Note that only one profiler
  78. -- object can be executing at one time.
  79. --
  80. function newProfiler(variant, sampledelay)
  81. if _profiler.running then
  82. print("Profiler already running.")
  83. return
  84. end
  85. variant = variant or "time"
  86. if variant ~= "time" and variant ~= "call" then
  87. print("Profiler method must be 'time' or 'call'.")
  88. return
  89. end
  90. local newprof = {}
  91. for k,v in pairs(_profiler) do
  92. newprof[k] = v
  93. end
  94. newprof.variant = variant
  95. newprof.sampledelay = sampledelay or 100000
  96. return newprof
  97. end
  98. --
  99. -- This function starts the profiler. It will do nothing
  100. -- if this (or any other) profiler is already running.
  101. --
  102. function _profiler.start(self)
  103. if _profiler.running then
  104. return
  105. end
  106. -- Start the profiler. This begins by setting up internal profiler state
  107. _profiler.running = self
  108. self.rawstats = {}
  109. self.callstack = {}
  110. if self.variant == "time" then
  111. self.lastclock = os.clock()
  112. debug.sethook( _profiler_hook_wrapper_by_time, "", self.sampledelay )
  113. elseif self.variant == "call" then
  114. debug.sethook( _profiler_hook_wrapper_by_call, "cr" )
  115. else
  116. print("Profiler method must be 'time' or 'call'.")
  117. sys.exit(1)
  118. end
  119. end
  120. --
  121. -- This function stops the profiler. It will do nothing
  122. -- if a profiler is not running, and nothing if it isn't
  123. -- the currently running profiler.
  124. --
  125. function _profiler.stop(self)
  126. if _profiler.running ~= self then
  127. return
  128. end
  129. -- Stop the profiler.
  130. debug.sethook( nil )
  131. _profiler.running = nil
  132. end
  133. --
  134. -- Simple wrapper to handle the hook. You should not
  135. -- be calling this directly. Duplicated to reduce overhead.
  136. --
  137. function _profiler_hook_wrapper_by_call(action)
  138. if _profiler.running == nil then
  139. debug.sethook( nil )
  140. end
  141. _profiler.running:_internal_profile_by_call(action)
  142. end
  143. function _profiler_hook_wrapper_by_time(action)
  144. if _profiler.running == nil then
  145. debug.sethook( nil )
  146. end
  147. _profiler.running:_internal_profile_by_time(action)
  148. end
  149. --
  150. -- This is the main by-function-call function of the profiler and should not
  151. -- be called except by the hook wrapper
  152. --
  153. function _profiler._internal_profile_by_call(self,action)
  154. -- Since we can obtain the 'function' for the item we've had call us, we
  155. -- can use that...
  156. local caller_info = debug.getinfo( 3 )
  157. if caller_info == nil then
  158. print "No caller_info"
  159. return
  160. end
  161. --SHG_LOG("[_profiler._internal_profile] "..(caller_info.name or "<nil>"))
  162. -- Retrieve the most recent activation record...
  163. local latest_ar = nil
  164. if table.getn(self.callstack) > 0 then
  165. latest_ar = self.callstack[table.getn(self.callstack)]
  166. end
  167. -- Are we allowed to profile this function?
  168. local should_not_profile = 0
  169. for k,v in pairs(self.prevented_functions) do
  170. if k == caller_info.func then
  171. should_not_profile = v
  172. end
  173. end
  174. -- Also check the top activation record...
  175. if latest_ar then
  176. if latest_ar.should_not_profile == 2 then
  177. should_not_profile = 2
  178. end
  179. end
  180. -- Now then, are we in 'call' or 'return' ?
  181. -- print("Profile:", caller_info.name, "SNP:", should_not_profile,
  182. -- "Action:", action )
  183. if action == "call" then
  184. -- Making a call...
  185. local this_ar = {}
  186. this_ar.should_not_profile = should_not_profile
  187. this_ar.parent_ar = latest_ar
  188. this_ar.anon_child = 0
  189. this_ar.name_child = 0
  190. this_ar.children = {}
  191. this_ar.children_time = {}
  192. this_ar.clock_start = os.clock()
  193. -- Last thing to do on a call is to insert this onto the ar stack...
  194. table.insert( self.callstack, this_ar )
  195. else
  196. local this_ar = latest_ar
  197. if this_ar == nil then
  198. return -- No point in doing anything if no upper activation record
  199. end
  200. -- Right, calculate the time in this function...
  201. this_ar.clock_end = os.clock()
  202. this_ar.this_time = this_ar.clock_end - this_ar.clock_start
  203. -- Now, if we have a parent, update its call info...
  204. if this_ar.parent_ar then
  205. this_ar.parent_ar.children[caller_info.func] =
  206. (this_ar.parent_ar.children[caller_info.func] or 0) + 1
  207. this_ar.parent_ar.children_time[caller_info.func] =
  208. (this_ar.parent_ar.children_time[caller_info.func] or 0 ) +
  209. this_ar.this_time
  210. if caller_info.name == nil then
  211. this_ar.parent_ar.anon_child =
  212. this_ar.parent_ar.anon_child + this_ar.this_time
  213. else
  214. this_ar.parent_ar.name_child =
  215. this_ar.parent_ar.name_child + this_ar.this_time
  216. end
  217. end
  218. -- Now if we're meant to record information about ourselves, do so...
  219. if this_ar.should_not_profile == 0 then
  220. local inforec = self:_get_func_rec(caller_info.func,1)
  221. inforec.count = inforec.count + 1
  222. inforec.time = inforec.time + this_ar.this_time
  223. inforec.anon_child_time = inforec.anon_child_time + this_ar.anon_child
  224. inforec.name_child_time = inforec.name_child_time + this_ar.name_child
  225. inforec.func_info = caller_info
  226. for k,v in pairs(this_ar.children) do
  227. inforec.children[k] = (inforec.children[k] or 0) + v
  228. inforec.children_time[k] =
  229. (inforec.children_time[k] or 0) + this_ar.children_time[k]
  230. end
  231. end
  232. -- Last thing to do on return is to drop the last activation record...
  233. table.remove( self.callstack, table.getn( self.callstack ) )
  234. end
  235. end
  236. --
  237. -- This is the main by-time internal function of the profiler and should not
  238. -- be called except by the hook wrapper
  239. --
  240. function _profiler._internal_profile_by_time(self,action)
  241. -- we do this first so we add the minimum amount of extra time to this call
  242. local timetaken = os.clock() - self.lastclock
  243. local depth = 3
  244. local at_top = true
  245. local last_caller
  246. local caller = debug.getinfo(depth)
  247. while caller do
  248. if not caller.func then caller.func = "(tail call)" end
  249. if self.prevented_functions[caller.func] == nil then
  250. local info = self:_get_func_rec(caller.func, 1, caller)
  251. info.count = info.count + 1
  252. info.time = info.time + timetaken
  253. if last_caller then
  254. -- we're not the head, so update the "children" times also
  255. if last_caller.name then
  256. info.name_child_time = info.name_child_time + timetaken
  257. else
  258. info.anon_child_time = info.anon_child_time + timetaken
  259. end
  260. info.children[last_caller.func] =
  261. (info.children[last_caller.func] or 0) + 1
  262. info.children_time[last_caller.func] =
  263. (info.children_time[last_caller.func] or 0) + timetaken
  264. end
  265. end
  266. depth = depth + 1
  267. last_caller = caller
  268. caller = debug.getinfo(depth)
  269. end
  270. self.lastclock = os.clock()
  271. end
  272. --
  273. -- This returns a (possibly empty) function record for
  274. -- the specified function. It is for internal profiler use.
  275. --
  276. function _profiler._get_func_rec(self,func,force,info)
  277. -- Find the function ref for 'func' (if force and not present, create one)
  278. local ret = self.rawstats[func]
  279. if ret == nil and force ~= 1 then
  280. return nil
  281. end
  282. if ret == nil then
  283. -- Build a new function statistics table
  284. ret = {}
  285. ret.func = func
  286. ret.count = 0
  287. ret.time = 0
  288. ret.anon_child_time = 0
  289. ret.name_child_time = 0
  290. ret.children = {}
  291. ret.children_time = {}
  292. ret.func_info = info
  293. self.rawstats[func] = ret
  294. end
  295. return ret
  296. end
  297. --
  298. -- This writes a profile report to the output file object. If
  299. -- sort_by_total_time is nil or false the output is sorted by
  300. -- the function time minus the time in it's children.
  301. --
  302. function _profiler.report( self, outfile, sort_by_total_time )
  303. outfile:write
  304. [[Lua Profile output created by profiler.lua. Copyright Pepperfish 2002+
  305. ]]
  306. -- This is pretty awful.
  307. local terms = {}
  308. if self.variant == "time" then
  309. terms.capitalized = "Sample"
  310. terms.single = "sample"
  311. terms.pastverb = "sampled"
  312. elseif self.variant == "call" then
  313. terms.capitalized = "Call"
  314. terms.single = "call"
  315. terms.pastverb = "called"
  316. else
  317. assert(false)
  318. end
  319. local total_time = 0
  320. local ordering = {}
  321. for func,record in pairs(self.rawstats) do
  322. table.insert(ordering, func)
  323. end
  324. if sort_by_total_time then
  325. table.sort( ordering,
  326. function(a,b) return self.rawstats[a].time > self.rawstats[b].time end
  327. )
  328. else
  329. table.sort( ordering,
  330. function(a,b)
  331. local arec = self.rawstats[a]
  332. local brec = self.rawstats[b]
  333. local atime = arec.time - (arec.anon_child_time + arec.name_child_time)
  334. local btime = brec.time - (brec.anon_child_time + brec.name_child_time)
  335. return atime > btime
  336. end
  337. )
  338. end
  339. for i=1,table.getn(ordering) do
  340. local func = ordering[i]
  341. local record = self.rawstats[func]
  342. local thisfuncname = " " .. self:_pretty_name(func) .. " "
  343. if string.len( thisfuncname ) < 42 then
  344. thisfuncname =
  345. string.rep( "-", (42 - string.len(thisfuncname))/2 ) .. thisfuncname
  346. thisfuncname =
  347. thisfuncname .. string.rep( "-", 42 - string.len(thisfuncname) )
  348. end
  349. total_time = total_time + ( record.time - ( record.anon_child_time +
  350. record.name_child_time ) )
  351. outfile:write( string.rep( "-", 19 ) .. thisfuncname ..
  352. string.rep( "-", 19 ) .. "\n" )
  353. outfile:write( terms.capitalized.." count: " ..
  354. string.format( "%4d", record.count ) .. "\n" )
  355. outfile:write( "Time spend total: " ..
  356. string.format( "%4.3f", record.time ) .. "s\n" )
  357. outfile:write( "Time spent in children: " ..
  358. string.format("%4.3f",record.anon_child_time+record.name_child_time) ..
  359. "s\n" )
  360. local timeinself =
  361. record.time - (record.anon_child_time + record.name_child_time)
  362. outfile:write( "Time spent in self: " ..
  363. string.format("%4.3f", timeinself) .. "s\n" )
  364. outfile:write( "Time spent per " .. terms.single .. ": " ..
  365. string.format("%4.5f", record.time/record.count) ..
  366. "s/" .. terms.single .. "\n" )
  367. outfile:write( "Time spent in self per "..terms.single..": " ..
  368. string.format( "%4.5f", timeinself/record.count ) .. "s/" ..
  369. terms.single.."\n" )
  370. -- Report on each child in the form
  371. -- Child <funcname> called n times and took a.bs
  372. local added_blank = 0
  373. for k,v in pairs(record.children) do
  374. if self.prevented_functions[k] == nil or
  375. self.prevented_functions[k] == 0
  376. then
  377. if added_blank == 0 then
  378. outfile:write( "\n" ) -- extra separation line
  379. added_blank = 1
  380. end
  381. outfile:write( "Child " .. self:_pretty_name(k) ..
  382. string.rep( " ", 41-string.len(self:_pretty_name(k)) ) .. " " ..
  383. terms.pastverb.." " .. string.format("%6d", v) )
  384. outfile:write( " times. Took " ..
  385. string.format("%4.2f", record.children_time[k] ) .. "s\n" )
  386. end
  387. end
  388. outfile:write( "\n" ) -- extra separation line
  389. outfile:flush()
  390. end
  391. outfile:write( "\n\n" )
  392. outfile:write( "Total time spent in profiled functions: " ..
  393. string.format("%5.3g",total_time) .. "s\n" )
  394. outfile:write( [[
  395. END
  396. ]] )
  397. outfile:flush()
  398. end
  399. --
  400. -- This writes the profile to the output file object as
  401. -- loadable Lua source.
  402. --
  403. function _profiler.lua_report(self,outfile)
  404. -- Purpose: Write out the entire raw state in a cross-referenceable form.
  405. local ordering = {}
  406. local functonum = {}
  407. for func,record in pairs(self.rawstats) do
  408. table.insert(ordering, func)
  409. functonum[func] = table.getn(ordering)
  410. end
  411. outfile:write(
  412. "-- Profile generated by profiler.lua Copyright Pepperfish 2002+\n\n" )
  413. outfile:write( "-- Function names\nfuncnames = {}\n" )
  414. for i=1,table.getn(ordering) do
  415. local thisfunc = ordering[i]
  416. outfile:write( "funcnames[" .. i .. "] = " ..
  417. string.format("%q", self:_pretty_name(thisfunc)) .. "\n" )
  418. end
  419. outfile:write( "\n" )
  420. outfile:write( "-- Function times\nfunctimes = {}\n" )
  421. for i=1,table.getn(ordering) do
  422. local thisfunc = ordering[i]
  423. local record = self.rawstats[thisfunc]
  424. outfile:write( "functimes[" .. i .. "] = { " )
  425. outfile:write( "tot=" .. record.time .. ", " )
  426. outfile:write( "achild=" .. record.anon_child_time .. ", " )
  427. outfile:write( "nchild=" .. record.name_child_time .. ", " )
  428. outfile:write( "count=" .. record.count .. " }\n" )
  429. end
  430. outfile:write( "\n" )
  431. outfile:write( "-- Child links\nchildren = {}\n" )
  432. for i=1,table.getn(ordering) do
  433. local thisfunc = ordering[i]
  434. local record = self.rawstats[thisfunc]
  435. outfile:write( "children[" .. i .. "] = { " )
  436. for k,v in pairs(record.children) do
  437. if functonum[k] then -- non-recorded functions will be ignored now
  438. outfile:write( functonum[k] .. ", " )
  439. end
  440. end
  441. outfile:write( "}\n" )
  442. end
  443. outfile:write( "\n" )
  444. outfile:write( "-- Child call counts\nchildcounts = {}\n" )
  445. for i=1,table.getn(ordering) do
  446. local thisfunc = ordering[i]
  447. local record = self.rawstats[thisfunc]
  448. outfile:write( "children[" .. i .. "] = { " )
  449. for k,v in record.children do
  450. if functonum[k] then -- non-recorded functions will be ignored now
  451. outfile:write( v .. ", " )
  452. end
  453. end
  454. outfile:write( "}\n" )
  455. end
  456. outfile:write( "\n" )
  457. outfile:write( "-- Child call time\nchildtimes = {}\n" )
  458. for i=1,table.getn(ordering) do
  459. local thisfunc = ordering[i]
  460. local record = self.rawstats[thisfunc];
  461. outfile:write( "children[" .. i .. "] = { " )
  462. for k,v in pairs(record.children) do
  463. if functonum[k] then -- non-recorded functions will be ignored now
  464. outfile:write( record.children_time[k] .. ", " )
  465. end
  466. end
  467. outfile:write( "}\n" )
  468. end
  469. outfile:write( "\n\n-- That is all.\n\n" )
  470. outfile:flush()
  471. end
  472. -- Internal function to calculate a pretty name for the profile output
  473. function _profiler._pretty_name(self,func)
  474. -- Only the data collected during the actual
  475. -- run seems to be correct.... why?
  476. local info = self.rawstats[ func ].func_info
  477. -- local info = debug.getinfo( func )
  478. local name = ""
  479. if info.what == "Lua" then
  480. name = "L:"
  481. end
  482. if info.what == "C" then
  483. name = "C:"
  484. end
  485. if info.what == "main" then
  486. name = " :"
  487. end
  488. if info.name == nil then
  489. name = name .. "<"..tostring(func) .. ">"
  490. else
  491. name = name .. info.name
  492. end
  493. if info.source then
  494. name = name .. "@" .. info.source
  495. else
  496. if info.what == "C" then
  497. name = name .. "@?"
  498. else
  499. name = name .. "@<string>"
  500. end
  501. end
  502. name = name .. ":"
  503. if info.what == "C" then
  504. name = name .. "?"
  505. else
  506. name = name .. info.linedefined
  507. end
  508. return name
  509. end
  510. --
  511. -- This allows you to specify functions which you do
  512. -- not want profiled. Setting level to 1 keeps the
  513. -- function from being profiled. Setting level to 2
  514. -- keeps both the function and its children from
  515. -- being profiled.
  516. --
  517. -- BUG: 2 will probably act exactly like 1 in "time" mode.
  518. -- If anyone cares, let me (zorba) know and it can be fixed.
  519. --
  520. function _profiler.prevent(self, func, level)
  521. self.prevented_functions[func] = (level or 1)
  522. end
  523. _profiler.prevented_functions = {
  524. [_profiler.start] = 2,
  525. [_profiler.stop] = 2,
  526. [_profiler._internal_profile_by_time] = 2,
  527. [_profiler._internal_profile_by_call] = 2,
  528. [_profiler_hook_wrapper_by_time] = 2,
  529. [_profiler_hook_wrapper_by_call] = 2,
  530. [_profiler.prevent] = 2,
  531. [_profiler._get_func_rec] = 2,
  532. [_profiler.report] = 2,
  533. [_profiler.lua_report] = 2,
  534. [_profiler._pretty_name] = 2
  535. }