pepperfish_profiler.lua 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629
  1. --[[
  2. == Introduction ==
  3. Note that this requires os.clock(), debug.sethook(),
  4. and debug.getinfo() or your equivalent replacements to
  5. be available if this is an embedded application.
  6. Example usage:
  7. profiler = newProfiler()
  8. profiler:start()
  9. < call some functions that take time >
  10. profiler:stop()
  11. local outfile = io.open( "profile.txt", "w+" )
  12. profiler:report( outfile )
  13. outfile:close()
  14. == Optionally choosing profiling method ==
  15. The rest of this comment can be ignored if you merely want a good profiler.
  16. newProfiler(method, sampledelay):
  17. If method is omitted or "time", will profile based on real performance.
  18. optionally, frequency can be provided to control the number of opcodes
  19. per profiling tick. By default this is 100000, which (on my system) provides
  20. one tick approximately every 2ms and reduces system performance by about 10%.
  21. This can be reduced to increase accuracy at the cost of performance, or
  22. increased for the opposite effect.
  23. If method is "call", will profile based on function calls. Frequency is
  24. ignored.
  25. "time" may bias profiling somewhat towards large areas with "simple opcodes",
  26. as the profiling function (which introduces a certain amount of unavoidable
  27. overhead) will be called more often. This can be minimized by using a larger
  28. sample delay - the default should leave any error largely overshadowed by
  29. statistical noise. With a delay of 1000 I was able to achieve inaccuray of
  30. approximately 25%. Increasing the delay to 100000 left inaccuracy below my
  31. testing error.
  32. "call" may bias profiling heavily towards areas with many function calls.
  33. Testing found a degenerate case giving a figure inaccurate by approximately
  34. 20,000%. (Yes, a multiple of 200.) This is, however, more directly comparable
  35. to common profilers (such as gprof) and also gives accurate function call
  36. counts, which cannot be retrieved from "time".
  37. I strongly recommend "time" mode, and it is now the default.
  38. == History ==
  39. 2008-09-16 - Time-based profiling and conversion to Lua 5.1
  40. by Ben Wilhelm ( [email protected] ).
  41. Added the ability to optionally choose profiling methods, along with a new
  42. profiling method.
  43. Converted to Lua 5, a few improvements, and
  44. additional documentation by Tom Spilman ( [email protected] )
  45. Additional corrections and tidying by original author
  46. Daniel Silverstone ( [email protected] )
  47. == Status ==
  48. Daniel Silverstone is no longer using this code, and judging by how long it's
  49. been waiting for Lua 5.1 support, I don't think Tom Spilman is either. I'm
  50. perfectly willing to take on maintenance, so if you have problems or
  51. questions, go ahead and email me :)
  52. -- Ben Wilhelm ( [email protected] ) '
  53. == Copyright ==
  54. Lua profiler - Copyright Pepperfish 2002,2003,2004
  55. Permission is hereby granted, free of charge, to any person obtaining a copy
  56. of this software and associated documentation files (the "Software"), to
  57. deal in the Software without restriction, including without limitation the
  58. rights to use, copy, modify, merge, publish, distribute, and/or sell copies
  59. of the Software, and to permit persons to whom the Software is furnished to
  60. do so, subject to the following conditions:
  61. The above copyright notice and this permission notice shall be included in
  62. all copies or substantial portions of the Software.
  63. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  64. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  65. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  66. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  67. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  68. FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  69. IN THE SOFTWARE.
  70. --]]
  71. --
  72. -- All profiler related stuff is stored in the top level table '_profiler'
  73. --
  74. _profiler = {}
  75. --
  76. -- newProfiler() creates a new profiler object for managing
  77. -- the profiler and storing state. Note that only one profiler
  78. -- object can be executing at one time.
  79. --
  80. function newProfiler(variant, sampledelay)
  81. if _profiler.running then
  82. print("Profiler already running.")
  83. return
  84. end
  85. variant = variant or "time"
  86. if variant ~= "time" and variant ~= "call" then
  87. print("Profiler method must be 'time' or 'call'.")
  88. return
  89. end
  90. local newprof = {}
  91. for k,v in pairs(_profiler) do
  92. newprof[k] = v
  93. end
  94. newprof.variant = variant
  95. newprof.sampledelay = sampledelay or 100000
  96. return newprof
  97. end
  98. --
  99. -- This function starts the profiler. It will do nothing
  100. -- if this (or any other) profiler is already running.
  101. --
  102. function _profiler.start(self)
  103. if _profiler.running then
  104. return
  105. end
  106. -- Start the profiler. This begins by setting up internal profiler state
  107. _profiler.running = self
  108. self.rawstats = {}
  109. self.callstack = {}
  110. if self.variant == "time" then
  111. self.lastclock = os.clock()
  112. debug.sethook( _profiler_hook_wrapper_by_time, "", self.sampledelay )
  113. elseif self.variant == "call" then
  114. debug.sethook( _profiler_hook_wrapper_by_call, "cr" )
  115. else
  116. print("Profiler method must be 'time' or 'call'.")
  117. sys.exit(1)
  118. end
  119. end
  120. --
  121. -- This function stops the profiler. It will do nothing
  122. -- if a profiler is not running, and nothing if it isn't
  123. -- the currently running profiler.
  124. --
  125. function _profiler.stop(self)
  126. if _profiler.running ~= self then
  127. return
  128. end
  129. -- Stop the profiler.
  130. debug.sethook( nil )
  131. _profiler.running = nil
  132. end
  133. --
  134. -- Simple wrapper to handle the hook. You should not
  135. -- be calling this directly. Duplicated to reduce overhead.
  136. --
  137. function _profiler_hook_wrapper_by_call(action)
  138. if _profiler.running == nil then
  139. debug.sethook( nil )
  140. end
  141. _profiler.running:_internal_profile_by_call(action)
  142. end
  143. function _profiler_hook_wrapper_by_time(action)
  144. if _profiler.running == nil then
  145. debug.sethook( nil )
  146. end
  147. _profiler.running:_internal_profile_by_time(action)
  148. end
  149. --
  150. -- This is the main by-function-call function of the profiler and should not
  151. -- be called except by the hook wrapper
  152. --
  153. function _profiler._internal_profile_by_call(self,action)
  154. -- Since we can obtain the 'function' for the item we've had call us, we
  155. -- can use that...
  156. local caller_info = debug.getinfo( 3 )
  157. if caller_info == nil then
  158. print "No caller_info"
  159. return
  160. end
  161. --SHG_LOG("[_profiler._internal_profile] "..(caller_info.name or "<nil>"))
  162. -- Retrieve the most recent activation record...
  163. local latest_ar = nil
  164. local callstackLen = #self.callstack
  165. if callstackLen > 0 then
  166. latest_ar = self.callstack[callstackLen]
  167. end
  168. -- Are we allowed to profile this function?
  169. local should_not_profile = 0
  170. for k,v in pairs(self.prevented_functions) do
  171. if k == caller_info.func then
  172. should_not_profile = v
  173. end
  174. end
  175. -- Also check the top activation record...
  176. if latest_ar then
  177. if latest_ar.should_not_profile == 2 then
  178. should_not_profile = 2
  179. end
  180. end
  181. -- Now then, are we in 'call' or 'return' ?
  182. -- print("Profile:", caller_info.name, "SNP:", should_not_profile,
  183. -- "Action:", action )
  184. if action == "call" then
  185. -- Making a call...
  186. local this_ar = {}
  187. this_ar.should_not_profile = should_not_profile
  188. this_ar.parent_ar = latest_ar
  189. this_ar.anon_child = 0
  190. this_ar.name_child = 0
  191. this_ar.children = {}
  192. this_ar.children_time = {}
  193. this_ar.clock_start = os.clock()
  194. -- Last thing to do on a call is to insert this onto the ar stack...
  195. table.insert( self.callstack, this_ar )
  196. else
  197. local this_ar = latest_ar
  198. if this_ar == nil then
  199. return -- No point in doing anything if no upper activation record
  200. end
  201. -- Right, calculate the time in this function...
  202. this_ar.clock_end = os.clock()
  203. this_ar.this_time = this_ar.clock_end - this_ar.clock_start
  204. -- Now, if we have a parent, update its call info...
  205. if this_ar.parent_ar then
  206. this_ar.parent_ar.children[caller_info.func] =
  207. (this_ar.parent_ar.children[caller_info.func] or 0) + 1
  208. this_ar.parent_ar.children_time[caller_info.func] =
  209. (this_ar.parent_ar.children_time[caller_info.func] or 0 ) +
  210. this_ar.this_time
  211. if caller_info.name == nil then
  212. this_ar.parent_ar.anon_child =
  213. this_ar.parent_ar.anon_child + this_ar.this_time
  214. else
  215. this_ar.parent_ar.name_child =
  216. this_ar.parent_ar.name_child + this_ar.this_time
  217. end
  218. end
  219. -- Now if we're meant to record information about ourselves, do so...
  220. if this_ar.should_not_profile == 0 then
  221. local inforec = self:_get_func_rec(caller_info.func,1)
  222. inforec.count = inforec.count + 1
  223. inforec.time = inforec.time + this_ar.this_time
  224. inforec.anon_child_time = inforec.anon_child_time + this_ar.anon_child
  225. inforec.name_child_time = inforec.name_child_time + this_ar.name_child
  226. inforec.func_info = caller_info
  227. for k,v in pairs(this_ar.children) do
  228. inforec.children[k] = (inforec.children[k] or 0) + v
  229. inforec.children_time[k] =
  230. (inforec.children_time[k] or 0) + this_ar.children_time[k]
  231. end
  232. end
  233. -- Last thing to do on return is to drop the last activation record...
  234. local callstackLen = #self.callstack
  235. table.remove( self.callstack, callstackLen )
  236. end
  237. end
  238. --
  239. -- This is the main by-time internal function of the profiler and should not
  240. -- be called except by the hook wrapper
  241. --
  242. function _profiler._internal_profile_by_time(self,action)
  243. -- we do this first so we add the minimum amount of extra time to this call
  244. local timetaken = os.clock() - self.lastclock
  245. local depth = 3
  246. local at_top = true
  247. local last_caller
  248. local caller = debug.getinfo(depth)
  249. while caller do
  250. if not caller.func then caller.func = "(tail call)" end
  251. if self.prevented_functions[caller.func] == nil then
  252. local info = self:_get_func_rec(caller.func, 1, caller)
  253. info.count = info.count + 1
  254. info.time = info.time + timetaken
  255. if last_caller then
  256. -- we're not the head, so update the "children" times also
  257. if last_caller.name then
  258. info.name_child_time = info.name_child_time + timetaken
  259. else
  260. info.anon_child_time = info.anon_child_time + timetaken
  261. end
  262. info.children[last_caller.func] =
  263. (info.children[last_caller.func] or 0) + 1
  264. info.children_time[last_caller.func] =
  265. (info.children_time[last_caller.func] or 0) + timetaken
  266. end
  267. end
  268. depth = depth + 1
  269. last_caller = caller
  270. caller = debug.getinfo(depth)
  271. end
  272. self.lastclock = os.clock()
  273. end
  274. --
  275. -- This returns a (possibly empty) function record for
  276. -- the specified function. It is for internal profiler use.
  277. --
  278. function _profiler._get_func_rec(self,func,force,info)
  279. -- Find the function ref for 'func' (if force and not present, create one)
  280. local ret = self.rawstats[func]
  281. if ret == nil and force ~= 1 then
  282. return nil
  283. end
  284. if ret == nil then
  285. -- Build a new function statistics table
  286. ret = {}
  287. ret.func = func
  288. ret.count = 0
  289. ret.time = 0
  290. ret.anon_child_time = 0
  291. ret.name_child_time = 0
  292. ret.children = {}
  293. ret.children_time = {}
  294. ret.func_info = info
  295. self.rawstats[func] = ret
  296. end
  297. return ret
  298. end
  299. --
  300. -- This writes a profile report to the output file object. If
  301. -- sort_by_total_time is nil or false the output is sorted by
  302. -- the function time minus the time in it's children.
  303. --
  304. function _profiler.report( self, outfile, sort_by_total_time )
  305. outfile:write
  306. [[Lua Profile output created by profiler.lua. Copyright Pepperfish 2002+
  307. ]]
  308. -- This is pretty awful.
  309. local terms = {}
  310. if self.variant == "time" then
  311. terms.capitalized = "Sample"
  312. terms.single = "sample"
  313. terms.pastverb = "sampled"
  314. elseif self.variant == "call" then
  315. terms.capitalized = "Call"
  316. terms.single = "call"
  317. terms.pastverb = "called"
  318. else
  319. assert(false)
  320. end
  321. local total_time = 0
  322. local ordering = {}
  323. for func,record in pairs(self.rawstats) do
  324. table.insert(ordering, func)
  325. end
  326. if sort_by_total_time then
  327. table.sort( ordering,
  328. function(a,b) return self.rawstats[a].time > self.rawstats[b].time end
  329. )
  330. else
  331. table.sort( ordering,
  332. function(a,b)
  333. local arec = self.rawstats[a]
  334. local brec = self.rawstats[b]
  335. local atime = arec.time - (arec.anon_child_time + arec.name_child_time)
  336. local btime = brec.time - (brec.anon_child_time + brec.name_child_time)
  337. return atime > btime
  338. end
  339. )
  340. end
  341. local len = #ordering
  342. for i=1,len do
  343. local func = ordering[i]
  344. local record = self.rawstats[func]
  345. local thisfuncname = " " .. self:_pretty_name(func) .. " "
  346. if string.len( thisfuncname ) < 42 then
  347. local nameLen = math.ceil((42 - string.len(thisfuncname))/2)
  348. thisfuncname =
  349. string.rep( "-", nameLen) .. thisfuncname
  350. thisfuncname =
  351. thisfuncname .. string.rep( "-", 42 - string.len(thisfuncname) )
  352. end
  353. total_time = total_time + ( record.time - ( record.anon_child_time +
  354. record.name_child_time ) )
  355. outfile:write( string.rep( "-", 19 ) .. thisfuncname ..
  356. string.rep( "-", 19 ) .. "\n" )
  357. outfile:write( terms.capitalized.." count: " ..
  358. string.format( "%4d", record.count ) .. "\n" )
  359. outfile:write( "Time spend total: " ..
  360. string.format( "%4.3f", record.time ) .. "s\n" )
  361. outfile:write( "Time spent in children: " ..
  362. string.format("%4.3f",record.anon_child_time+record.name_child_time) ..
  363. "s\n" )
  364. local timeinself =
  365. record.time - (record.anon_child_time + record.name_child_time)
  366. outfile:write( "Time spent in self: " ..
  367. string.format("%4.3f", timeinself) .. "s\n" )
  368. outfile:write( "Time spent per " .. terms.single .. ": " ..
  369. string.format("%4.5f", record.time/record.count) ..
  370. "s/" .. terms.single .. "\n" )
  371. outfile:write( "Time spent in self per "..terms.single..": " ..
  372. string.format( "%4.5f", timeinself/record.count ) .. "s/" ..
  373. terms.single.."\n" )
  374. -- Report on each child in the form
  375. -- Child <funcname> called n times and took a.bs
  376. local added_blank = 0
  377. for k,v in pairs(record.children) do
  378. if self.prevented_functions[k] == nil or
  379. self.prevented_functions[k] == 0
  380. then
  381. if added_blank == 0 then
  382. outfile:write( "\n" ) -- extra separation line
  383. added_blank = 1
  384. end
  385. outfile:write( "Child " .. self:_pretty_name(k) ..
  386. string.rep( " ", 41-string.len(self:_pretty_name(k)) ) .. " " ..
  387. terms.pastverb.." " .. string.format("%6d", v) )
  388. outfile:write( " times. Took " ..
  389. string.format("%4.2f", record.children_time[k] ) .. "s\n" )
  390. end
  391. end
  392. outfile:write( "\n" ) -- extra separation line
  393. outfile:flush()
  394. end
  395. outfile:write( "\n\n" )
  396. outfile:write( "Total time spent in profiled functions: " ..
  397. string.format("%5.3g",total_time) .. "s\n" )
  398. outfile:write( [[
  399. END
  400. ]] )
  401. outfile:flush()
  402. end
  403. --
  404. -- This writes the profile to the output file object as
  405. -- loadable Lua source.
  406. --
  407. function _profiler.lua_report(self,outfile)
  408. -- Purpose: Write out the entire raw state in a cross-referenceable form.
  409. local ordering = {}
  410. local functonum = {}
  411. for func,record in pairs(self.rawstats) do
  412. table.insert(ordering, func)
  413. local len = #ordering
  414. functonum[func] = len
  415. end
  416. outfile:write(
  417. "-- Profile generated by profiler.lua Copyright Pepperfish 2002+\n\n" )
  418. outfile:write( "-- Function names\nfuncnames = {}\n" )
  419. local len = #ordering
  420. for i=1,len do
  421. local thisfunc = ordering[i]
  422. outfile:write( "funcnames[" .. i .. "] = " ..
  423. string.format("%q", self:_pretty_name(thisfunc)) .. "\n" )
  424. end
  425. outfile:write( "\n" )
  426. outfile:write( "-- Function times\nfunctimes = {}\n" )
  427. local len = #ordering
  428. for i=1,len do
  429. local thisfunc = ordering[i]
  430. local record = self.rawstats[thisfunc]
  431. outfile:write( "functimes[" .. i .. "] = { " )
  432. outfile:write( "tot=" .. record.time .. ", " )
  433. outfile:write( "achild=" .. record.anon_child_time .. ", " )
  434. outfile:write( "nchild=" .. record.name_child_time .. ", " )
  435. outfile:write( "count=" .. record.count .. " }\n" )
  436. end
  437. outfile:write( "\n" )
  438. outfile:write( "-- Child links\nchildren = {}\n" )
  439. local len = #ordering
  440. for i=1,len do
  441. local thisfunc = ordering[i]
  442. local record = self.rawstats[thisfunc]
  443. outfile:write( "children[" .. i .. "] = { " )
  444. for k,v in pairs(record.children) do
  445. if functonum[k] then -- non-recorded functions will be ignored now
  446. outfile:write( functonum[k] .. ", " )
  447. end
  448. end
  449. outfile:write( "}\n" )
  450. end
  451. outfile:write( "\n" )
  452. outfile:write( "-- Child call counts\nchildcounts = {}\n" )
  453. local len = #ordering
  454. for i=1,len do
  455. local thisfunc = ordering[i]
  456. local record = self.rawstats[thisfunc]
  457. outfile:write( "children[" .. i .. "] = { " )
  458. for k,v in record.children do
  459. if functonum[k] then -- non-recorded functions will be ignored now
  460. outfile:write( v .. ", " )
  461. end
  462. end
  463. outfile:write( "}\n" )
  464. end
  465. outfile:write( "\n" )
  466. outfile:write( "-- Child call time\nchildtimes = {}\n" )
  467. local len = #ordering
  468. for i=1,len do
  469. local thisfunc = ordering[i]
  470. local record = self.rawstats[thisfunc];
  471. outfile:write( "children[" .. i .. "] = { " )
  472. for k,v in pairs(record.children) do
  473. if functonum[k] then -- non-recorded functions will be ignored now
  474. outfile:write( record.children_time[k] .. ", " )
  475. end
  476. end
  477. outfile:write( "}\n" )
  478. end
  479. outfile:write( "\n\n-- That is all.\n\n" )
  480. outfile:flush()
  481. end
  482. -- Internal function to calculate a pretty name for the profile output
  483. function _profiler._pretty_name(self,func)
  484. -- Only the data collected during the actual
  485. -- run seems to be correct.... why?
  486. local info = self.rawstats[ func ].func_info
  487. -- local info = debug.getinfo( func )
  488. local name = ""
  489. if info.what == "Lua" then
  490. name = "L:"
  491. end
  492. if info.what == "C" then
  493. name = "C:"
  494. end
  495. if info.what == "main" then
  496. name = " :"
  497. end
  498. if info.name == nil then
  499. name = name .. "<"..tostring(func) .. ">"
  500. else
  501. name = name .. info.name
  502. end
  503. if info.source then
  504. name = name .. "@" .. info.source
  505. else
  506. if info.what == "C" then
  507. name = name .. "@?"
  508. else
  509. name = name .. "@<string>"
  510. end
  511. end
  512. name = name .. ":"
  513. if info.what == "C" then
  514. name = name .. "?"
  515. else
  516. name = name .. info.linedefined
  517. end
  518. return name
  519. end
  520. --
  521. -- This allows you to specify functions which you do
  522. -- not want profiled. Setting level to 1 keeps the
  523. -- function from being profiled. Setting level to 2
  524. -- keeps both the function and its children from
  525. -- being profiled.
  526. --
  527. -- BUG: 2 will probably act exactly like 1 in "time" mode.
  528. -- If anyone cares, let me (zorba) know and it can be fixed.
  529. --
  530. function _profiler.prevent(self, func, level)
  531. self.prevented_functions[func] = (level or 1)
  532. end
  533. _profiler.prevented_functions = {
  534. [_profiler.start] = 2,
  535. [_profiler.stop] = 2,
  536. [_profiler._internal_profile_by_time] = 2,
  537. [_profiler._internal_profile_by_call] = 2,
  538. [_profiler_hook_wrapper_by_time] = 2,
  539. [_profiler_hook_wrapper_by_call] = 2,
  540. [_profiler.prevent] = 2,
  541. [_profiler._get_func_rec] = 2,
  542. [_profiler.report] = 2,
  543. [_profiler.lua_report] = 2,
  544. [_profiler._pretty_name] = 2
  545. }