Kernel Profiling This is the kernel level tool which statically tracks the performance of the kernel. I have modified it so that we can at least get useful information out of it. Basically, the kernel has some built in profiling available which is enabled via the boot arguments to the kernel. When the kernel is booted with profile=2 the kernel profiles itself creating a /proc/profile which contains a histogram of the kernel execution footprint. Where 2 is used to reduce the size of the static table maintained by the kernel by a factor of 4 which seems reasonable to me. The last entry of the /proc/profile represents the rest of the running world and I have pulled it out of the readprofile as a separate entity because it doesn't contain any valuable insight accept how much of a percentage of time your spending in the kernel. right after boot! root:~> cp /u/readprofile /bin root:~> readprofile 1% 4 __stext 0.0112 7% 20 _calibrate_delay 0.0537 3% 9 _blackfin_icache_dcache_flush_range 0.3214 --------------------------------------------------------------------------- 35 total 0.0009 userland and other is 3923/3958 ~ 99% non kernel root:~> /u/mp/mplayer /u/vid/sman.mp4 # QVGA video displayed on /dev/fb w/NFS … « snip » root:~> readprofile 2% 20 _calibrate_delay 0.0537 20% 143 _safe_dma_memcpy 3.2500 1% 10 _blackfin_icache_dcache_flush_range 0.3571 1% 11 _blackfin_dflush_page 0.5500 --------------------------------------------------------------------------- 192 total 0.0051 userland and other is 23927/24119 ~ 99% non kernel Not bad not bad at all so thats 1% of the time spent in the static region of the kernel i.e. (in other words) all that maps /proc/kallsyms. BTW (By the way) this makes sense to me I put a call to dma_memcpy in the vo_fbdev.c and it gets called a lot. The patch contains: 1. adjustment to where one fines the symbol table on embedded machine. 2. rename \_stext to \_\_stext 3. special handling of the statistics associated with the last bin of the histogram. Index: readprofile.c =================================================================== --- readprofile.c (revision 5180) +++ readprofile.c (working copy) @@ -36,7 +36,7 @@ /* These are the defaults */ #ifdef EMBED -static char defaultmap[]="/System.map"; +static char defaultmap[]="/proc/kallsyms"; #else static char defaultmap[]="/usr/src/linux/System.map"; #endif @@ -88,6 +88,7 @@ char mapline[S_LEN]; int maplineno=1; int popenMap; /* flag to tell if popen() has been used */ +int ulandcnt; #define next (current^1) @@ -143,9 +144,13 @@ } close(proFd); + len = len - sizeof(int); // this is the last sample address it collects the world + for (index = 0, totalticks = 0; (index < len/sizeof(int)); index++) totalticks += buf[index]; + ulandcnt = buf[index]; + index = 0; step=buf[0]; if (optInfo) @@ -167,7 +172,7 @@ prgname,mapFile, maplineno); exit(1); } - if (strcmp(fn_name,"_stext") == 0) /* only elf works like this */ + if (strcmp(fn_name,"__stext") == 0) /* only elf works like this */ { add0=fn_add; break; @@ -176,7 +181,7 @@ if (!add0) { - fprintf(stderr,"%s: can't find \"_stext\" in %s\n",prgname, mapFile); + fprintf(stderr,"%s: can't find \"__stext\" in %s\n",prgname, mapFile); exit(1); } @@ -225,6 +230,10 @@ printf(" %6i %-36s %8d.%04d\n", total,"total", (total / (fn_add-add0)), ((total*10000)/(fn_add-add0))%10000 ); + + total += ulandcnt; + printf ("userland and other is %d/%d ~ %2d%% non kernel\n", ulandcnt, total, + (ulandcnt*100)/total); popenMap ? pclose(map) : fclose(map); exit(0);