world leader in high performance signal processing
Trace:

Differences

This shows you the differences between two versions of the page.

kernel_profiling [2007/04/20 16:42] (current)
mmh created
Line 1: Line 1:
 +==== Kernel Profiling ====
  
 +This is the kernel level tool which statically tracks the performance
 +of the kernel.  I have modified it so that we can at least get useful
 +information out of it.  Basically, the kernel has some built in
 +profiling available which is enabled via the boot arguments to the
 +kernel.  When the kernel is booted with profile=2 the kernel profiles
 +itself creating a /proc/profile which contains a histogram of the
 +kernel execution footprint. Where 2 is used to reduce the size of the
 +static table maintained by the kernel by a factor of 4 which seems
 +reasonable to me.
 +
 +The last entry of the /proc/profile represents the rest of the running
 +world and I have pulled it out of the readprofile as a separate entity
 +because it doesn't contain any valuable insight accept how much of a
 +percentage of time your spending in the kernel.
 +
 +right after boot!
 +
 +<code>
 +root:~> cp /u/readprofile /bin
 +root:~> readprofile 
 +  1%      4 __stext                                     0.0112
 +  7%     20 _calibrate_delay                            0.0537
 +  3%      9 _blackfin_icache_dcache_flush_range         0.3214
 +---------------------------------------------------------------------------
 +         35 total                                       0.0009
 +userland and other is 3923/3958 ~ 99% non kernel
 +
 +</code>
 +
 +<code>
 +root:~> /u/mp/mplayer /u/vid/sman.mp4  # QVGA video displayed on /dev/fb w/NFS
 +</code>
 +
 +... << snip >>
 +
 +<code>
 +root:~> readprofile 
 +  2%     20 _calibrate_delay                            0.0537
 + 20%    143 _safe_dma_memcpy                            3.2500
 +  1%     10 _blackfin_icache_dcache_flush_range         0.3571
 +  1%     11 _blackfin_dflush_page                       0.5500
 +---------------------------------------------------------------------------
 +        192 total                                       0.0051
 +userland and other is 23927/24119 ~ 99% non kernel
 +</code>
 +
 +Not bad not bad at all so thats 1% of the time spent in the static
 +region of the kernel i.e. all that maps /proc/kallsyms.  BTW this
 +makes sense to me I put a call to dma_memcpy in the vo_fbdev.c and it
 +gets called a lot.
 +
 +The patch contains:
 +
 +1. adjustment to where one fines the symbol table on embedded machine.
 +2. rename \_stext to \_\_stext
 +3. special handling of the statistics associated with the last bin of the histogram.
 +
 +<code>
 +Index: readprofile.c
 +===================================================================
 +--- readprofile.c (revision 5180)
 ++++ readprofile.c (working copy)
 +@@ -36,7 +36,7 @@
 + 
 + /* These are the defaults */
 + #ifdef EMBED
 +-static char defaultmap[]="/System.map";
 ++static char defaultmap[]="/proc/kallsyms";
 + #else
 + static char defaultmap[]="/usr/src/linux/System.map";
 + #endif
 +@@ -88,6 +88,7 @@
 + char mapline[S_LEN];
 + int maplineno=1;
 + int popenMap;   /* flag to tell if popen() has been used */
 ++int ulandcnt;
 + 
 + #define next (current^1)
 + 
 +@@ -143,9 +144,13 @@
 +     }
 +   close(proFd);
 + 
 ++  len = len - sizeof(int); // this is the last sample address it collects the world
 ++
 +   for (index = 0, totalticks = 0; (index < len/sizeof(int)); index++)
 +  totalticks += buf[index];
 + 
 ++  ulandcnt = buf[index];
 ++
 +   index = 0;
 +   step=buf[0];
 +   if (optInfo)
 +@@ -167,7 +172,7 @@
 +        prgname,mapFile, maplineno);
 +       exit(1);
 +       }
 +-    if (strcmp(fn_name,"_stext") == 0) /* only elf works like this */
 ++    if (strcmp(fn_name,"__stext") == 0) /* only elf works like this */
 +       {
 +       add0=fn_add;
 +       break;
 +@@ -176,7 +181,7 @@
 + 
 +   if (!add0)
 +     {
 +-    fprintf(stderr,"%s: can't find \"_stext\" in %s\n",prgname, mapFile);
 ++    fprintf(stderr,"%s: can't find \"__stext\" in %s\n",prgname, mapFile);
 +     exit(1);
 +     }
 + 
 +@@ -225,6 +230,10 @@
 +     printf("     %6i %-36s %8d.%04d\n",
 +     total,"total",
 +     (total / (fn_add-add0)), ((total*10000)/(fn_add-add0))%10000 );
 ++
 ++  total += ulandcnt;
 ++  printf ("userland and other is %d/%d ~ %2d%% non kernel\n", ulandcnt, total, 
 ++   (ulandcnt*100)/total);
 + 
 +   popenMap ? pclose(map) : fclose(map);
 +   exit(0);
 +</code>