# Debugging Our bug: c-crash.R ## See http://stackoverflow.com/questions/15816532/ ## C crash in R-2.15.3, fixed in R-3.0.* x <- structure(4:6, .Label = LETTERS[1:3], class="factor") write.table(x, tempfile()) Preparation - compile with flags -ggdb -O0 Inspect write.table: last line enters C code .Internal(write.table(x, file, nrow(x), p, rnames, sep, eol, na, dec, as.integer(quote), qmethod != "double")) Reproduce: $ ~/bin/R-2-15-branch/bin/R -f c-crash.R Run under 'gdb' debugger $ ~/bin/R-2-15-branch/bin/R -d gdb -f c-crash.R (gdb) run leads to > x <- structure(4:6, .Label = LETTERS[1:3], class="factor") > write.table(x, tempfile()) Program received signal SIGSEGV, Segmentation fault. 0x00007ffff7922920 in Rf_getCharCE (x=0x60000010) at /home/mtmorgan/src/R-2-15-branch/src/main/sysutils.c:724 724 if(TYPEOF(x) != CHARSXP) (gdb) Show listing (list, or just l) in vicinity of crash (gdb) l 719 return ans; 720 } 721 722 cetype_t getCharCE(SEXP x) 723 { 724 if(TYPEOF(x) != CHARSXP) 725 error(_("'%s' must be called on a CHARSXP"), "getCharCE"); 726 if(IS_UTF8(x)) return CE_UTF8; 727 else if(IS_LATIN1(x)) return CE_LATIN1; 728 else if(IS_BYTES(x)) return CE_BYTES; Look at the call stack (backtrace, bt) (gdb) bt #0 0x00007ffff7922920 in Rf_getCharCE (x=0x60000010) at /home/mtmorgan/src/R-2-15-branch/src/main/sysutils.c:724 #1 0x00007ffff7920ae7 in Rf_translateChar (x=0x60000010) at /home/mtmorgan/src/R-2-15-branch/src/main/sysutils.c:784 #2 0x00007ffff78f6789 in EncodeElement2 (x=0x193a440, indx=4, quote=TRUE, qmethod=TRUE, buff=0x7fffffffc2f0, cdec=46 '.') at /home/mtmorgan/src/R-2-15-branch/src/main/scan.c:1658 #3 0x00007ffff78f633d in do_writetable (call=0xdadba8, op=0x637a58, args=0xd3e190, rho=0xdacd08) at /home/mtmorgan/src/R-2-15-branch/src/main/scan.c:1789 #4 0x00007ffff787b762 in do_internal (call=0xdadb00, op=0x6113b0, args=0xd3f0a0, env=0xdacd08) Move up call stack 2, show listing (gdb) up 2 #2 0x00007ffff78f6789 in EncodeElement2 (x=0x193a440, indx=4, quote=TRUE, qmethod=TRUE, buff=0x7fffffffc2f0, cdec=46 '.') at /home/mtmorgan/src/R-2-15-branch/src/main/scan.c:1658 1658 p0 = translateChar(STRING_ELT(x, indx)); (gdb) l 1653 int nbuf; 1654 char *q; 1655 const char *p, *p0; 1656 1657 if(TYPEOF(x) == STRSXP) { 1658 p0 = translateChar(STRING_ELT(x, indx)); 1659 if(!quote) return p0; 1660 for(nbuf = 2, p = p0; *p; p++) /* find buffer length needed */ 1661 nbuf += (*p == '"') ? 2 : 1; 1662 R_AllocStringBuffer(nbuf, buff); Print indx, x, and call the Rf_PrintValue function to see the R representation of x (gdb) p indx $1 = 4 (gdb) p x $2 = (SEXP) 0x193a440 (gdb) call Rf_PrintValue(x) [1] "A" "B" "C" Weird! STRING_ELT(x, indx) is trying to access the 5th element (C is 0-based) of a character vector of length 3! One more level up shows that R is treating the integer codings as though they are correct (gdb) up #3 0x00007ffff78f633d in do_writetable (call=0xdadba8, op=0x637a58, args=0xd3e190, rho=0xdacd08) at /home/mtmorgan/src/R-2-15-branch/src/main/scan.c:1789 1789 tmp = EncodeElement2(levels[j], INTEGER(xj)[i] - 1, (gdb) call Rf_PrintValue(xj) [1] Levels: A B C Fix? Actual fix in R-3.0.1: > x <- structure(4:6, .Label = LETTERS[1:3], class="factor") > write.table(x, tempfile()) Error in .External2(C_writetable, x, file, nrow(x), p, rnames, sep, eol, : index out of range One way to discover fix: break on error, then move up call stack > ~/bin/R-devel/bin/R -d gdb -f c-crash.R (gdb) break Rf_error (gdb) run > x <- structure(4:6, .Label = LETTERS[1:3], class="factor") > write.table(x, tempfile()) Breakpoint 1, Rf_error (format=0x7ffff510787c "index out of range") at /home/mtmorgan/src/R-3-0-branch/src/main/errors.c:751 751 { (gdb) up #1 0x00007ffff5101d5d in EncodeElement2 (x=0x68ed18, indx=3, quote=TRUE, qmethod=TRUE, buff=, cdec=) at /home/mtmorgan/src/R-3-0-branch/src/library/utils/src/io.c:955 955 error(_("index out of range")); (gdb) l 950 int nbuf; 951 char *q; 952 const char *p, *p0; 953 954 if (indx < 0 || indx >= length(x)) 955 error(_("index out of range")); 956 if(TYPEOF(x) == STRSXP) { 957 p0 = translateChar(STRING_ELT(x, indx)); 958 if(!quote) return p0; 959 for(nbuf = 2, p = p0; *p; p++) /* find buffer length needed */ Who / when fixed? $ cd ~/src/R-devel $ svn blame src/library/utils/src/io.c | less ... 62707 murdoch if (indx < 0 || indx >= length(x)) 62707 murdoch error(_("index out of range")); ... $ svn log -r62707 ------------------------------------------------------------------------ r62707 | murdoch | 2013-05-02 11:23:36 -0700 (Thu, 02 May 2013) | 1 line Fix for PR#15300 ------------------------------------------------------------------------ R bug report: https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=15300 # valgind: memory corruption c-valgrind.c: #include SEXP c_off_by_one(SEXP x) { for (int i = 1; i < Rf_length(x) + 1; ++i) /* oops, C uses 0-based indexing */ REAL(x)[i] = i * i; return x; } c-valgrind.R dyn.load("c-valgrind.so") .Call("c_off_by_one", numeric(100)) Compiling and evaluating $ R CMD SHLIB c-valgrind.c clang -I/home/mtmorgan/bin/R-3-0-branch/include -DNDEBUG -I/usr/local/include -fpic -ggdb -O0 -Wall -pedantic -c c-valgrind.c -o c-valgrind.o clang -shared -L/usr/local/lib -o c-valgrind.so c-valgrind.o -L/home/mtmorgan/bin/R-3-0-branch/lib -lR $ R --vanilla -f c-valgrind.R > dyn.load("c-valgrind.so") > .Call("c_off_by_one", numeric(100)) [1] 0 1 4 9 16 25 36 49 64 81 100 121 144 169 196 [16] 225 256 289 324 361 400 441 484 529 576 625 676 729 784 841 [31] 900 961 1024 1089 1156 1225 1296 1369 1444 1521 1600 1681 1764 1849 1936 [46] 2025 2116 2209 2304 2401 2500 2601 2704 2809 2916 3025 3136 3249 3364 3481 [61] 3600 3721 3844 3969 4096 4225 4356 4489 4624 4761 4900 5041 5184 5329 5476 [76] 5625 5776 5929 6084 6241 6400 6561 6724 6889 7056 7225 7396 7569 7744 7921 [91] 8100 8281 8464 8649 8836 9025 9216 9409 9604 9801 > Symptom - Everything often seems fine - Occaisionally observe a segfault, e.g., some platforms or after some specific actions Investigate with valgrind - slow, so need to have a quickly (seconds) reproducible example $ R --vanilla -d valgrind -f c-valgrind.R ==1128== Memcheck, a memory error detector ==1128== Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al. ==1128== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info ==1128== Command: /home/mtmorgan/bin/R-3-0-branch/bin/exec/R --vanilla -f c-valgrind.R > dyn.load("c-valgrind.so") > .Call("c_off_by_one", numeric(100)) ==1128== Invalid write of size 8 ==1128== at 0x102D666C: c_off_by_one (c-valgrind.c:7) ==1128== by 0x4EC372B: do_dotcall (dotcode.c:579) ==1128== by 0x4EFFF99: Rf_eval (eval.c:635) ==1128== by 0x4F238AC: Rf_ReplIteration (main.c:258) ==1128== by 0x4F23B2F: R_ReplConsole (main.c:307) ==1128== by 0x4F2404F: run_Rmainloop (main.c:986) ==1128== by 0x400845: main (Rmain.c:32) ==1128== Address 0x10149248 is 0 bytes after a block of size 840 alloc'd ==1128== at 0x4C2B6CD: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==1128== by 0x4F2BF25: Rf_allocVector (memory.c:2452) ==1128== by 0x4E8D9A4: do_makevector (builtin.c:749) ==1128== by 0x4F2D5EA: do_internal (names.c:1170) ==1128== by 0x4EFFD9B: Rf_eval (eval.c:607) ==1128== by 0x4F03EBF: Rf_applyClosure (eval.c:1003) ==1128== by 0x4EFFAF4: Rf_eval (eval.c:654) ==1128== by 0x4F014AA: Rf_evalList (eval.c:1985) ==1128== by 0x4EFFE74: Rf_eval (eval.c:626) ==1128== by 0x4F238AC: Rf_ReplIteration (main.c:258) ==1128== by 0x4F23B2F: R_ReplConsole (main.c:307) ==1128== by 0x4F2404F: run_Rmainloop (main.c:986) ==1128== [1] 0 1 4 9 16 25 36 49 64 81 100 121 144 169 196 [16] 225 256 289 324 361 400 441 484 529 576 625 676 729 784 841 [31] 900 961 1024 1089 1156 1225 1296 1369 1444 1521 1600 1681 1764 1849 1936 [46] 2025 2116 2209 2304 2401 2500 2601 2704 2809 2916 3025 3136 3249 3364 3481 [61] 3600 3721 3844 3969 4096 4225 4356 4489 4624 4761 4900 5041 5184 5329 5476 [76] 5625 5776 5929 6084 6241 6400 6561 6724 6889 7056 7225 7396 7569 7744 7921 [91] 8100 8281 8464 8649 8836 9025 9216 9409 9604 9801 > ==1128== ==1128== HEAP SUMMARY: ==1128== in use at exit: 108,407,757 bytes in 20,020 blocks ==1128== total heap usage: 32,189 allocs, 12,169 frees, 117,878,550 bytes allocated ==1128== ==1128== LEAK SUMMARY: ==1128== definitely lost: 120 bytes in 2 blocks ==1128== indirectly lost: 480 bytes in 20 blocks ==1128== possibly lost: 0 bytes in 0 blocks ==1128== still reachable: 108,407,157 bytes in 19,998 blocks ==1128== suppressed: 0 bytes in 0 blocks ==1128== Rerun with --leak-check=full to see details of leaked memory ==1128== ==1128== For counts of detected and suppressed errors, rerun with: -v ==1128== Use --track-origins=yes to see where uninitialised values come from ==1128== ERROR SUMMARY: 19 errors from 4 contexts (suppressed: 2 from 2) # Profiling with operf Console 1: Where does findOverlaps spend its time? library(IRanges) ir <- IRanges(sample(100000), width=100) system.time(result <- findOverlaps(ir)) ## user system elapsed ## 4.380 0.236 4.623 Console 2: Find and attach to process $ ps -u mtmorgan|grep R $ operf --pid 1494 operf: Profiler started operf: Press Ctl-c or 'kill -SIGINT 1532' to stop profiling Return to console 1, run code, back to console 2, press ctrl-c $ opannotate --source ~/R/x86_64-unknown-linux-gnu-library/3.0-2.13/IRanges/libs/IRanges.so | less /* * Command line: opannotate --source /home/mtmorgan/R/x86_64-unknown-linux-gnu-library/3.0-2.13/IRanges/libs/IRanges.so * * Interpretation of command line: * Output annotated source file with samples * Output all files * * CPU: Intel Sandy Bridge microarchitecture, speed 2.401e+06 MHz (estimated) * Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 100000 */ files organized by most-frequently counted; 78% of samples from sort_utils.c /* * Total samples for file : "/home/mtmorgan/b/Rpacks/IRanges/src/sort_utils.c" * * 132386 78.6177 */ 34% of time in compar_aabb :static int compar_aabb(int i1, int i2) 4428 2.6296 :{ /* compar_aabb total: 57579 34.1934 */ : int ret; : 24601 14.6094 : ret = aa[i1] - aa[i2]; 12209 7.2503 : if (ret != 0) 2625 1.5589 : return ret; 2959 1.7572 : ret = bb[i1] - bb[i2]; 1455 0.8641 : return ret; 9302 5.5240 :} Times might change substantially when compiled with optimizations - operf tries to manage this