diff --git a/common/csr.h b/common/csr.h
index 62086331f2f0dc458f14f87777dea3683978f8a1..6c919f8a7622fff6c19fa74bf93d2938393097b3 100644
--- a/common/csr.h
+++ b/common/csr.h
@@ -7,14 +7,14 @@ csv zero = 0.0;
 #define CS_MAX(a,b) (((a) > (b)) ? (a) : (b))
 
 typedef struct csr_t {
-    csi  m; 
-    csi  n; 
-    csi  nzmax;
-    csi  nr; 
-    csi* r; 
-    csi* p; 
-    csi* j; 
-    csv* x; 
+    csi m;
+    csi n;
+    csi nzmax;
+    csi nr;
+    csi* r;
+    csi* p;
+    csi* j;
+    csv* x;
 } csr;
 /* method declarations {{{*/
 csr *csr_spfree(csr *A);
@@ -24,14 +24,14 @@ csr *csr_spfree(csr *A);
 
 /* free workspace and return a sparse matrix result */
 csr *csr_done(csr *C, void *w, void *x, csi ok) {
-    return(ok ? C : csr_spfree(C)); /* return result if OK, else free it */
+    return (ok ? C : csr_spfree(C)); /* return result if OK, else free it */
 }
 
 /* wrapper for free */
 void *cs_free(void *p) {
     if (p)
         free(p); /* free p if it is not already NULL */
-    return(NULL); /* return NULL to simplify the use of cs_free */
+    return (NULL); /* return NULL to simplify the use of cs_free */
 }
 
 /* wrapper for realloc */
@@ -40,9 +40,9 @@ void *csr_realloc(void *p, csi n, size_t size, csi *ok) {
     pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */
     *ok = (pnew != NULL); /* realloc fails if pnew is NULL */
     if (pnew == NULL) {
-            printf("%d:reallocation failed, pnew is NULL\n", __LINE__);
+        printf("%d:reallocation failed, pnew is NULL\n", __LINE__);
     }
-    return((*ok) ? pnew : p); /* return original p if failure */
+    return ((*ok) ? pnew : p); /* return original p if failure */
 }
 
 /* wrapper for realloc */
@@ -51,31 +51,31 @@ void *cs_realloc(void *p, csi n, size_t size, csi *ok) {
     pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */
     *ok = (pnew != NULL); /* realloc fails if pnew is NULL */
     if (pnew == NULL) {
-            printf("reallocation failed\n");
+        printf("reallocation failed\n");
     }
-    return((*ok) ? pnew : p); /* return original p if failure */
+    return ((*ok) ? pnew : p); /* return original p if failure */
 }
 
 /* change the max # of entries sparse matrix */
 csi csr_sprealloc(csr *A, csi nzmax) {
     csi ok, oki = 0, okj = 1, okx = 1;
     if (!A)
-            return(0);
+        return (0);
     if (nzmax <= 0)
-            nzmax = A->p[A->m];
-    A->j = (int*)csr_realloc(A->j, nzmax, sizeof(csi), &oki);
+        nzmax = A->p[A->m];
+    A->j = (int*) csr_realloc(A->j, nzmax, sizeof (csi), &oki);
     if (A->x)
-            A->x = (csv*)csr_realloc(A->x, nzmax, sizeof(csv), &okx);
+        A->x = (csv*) csr_realloc(A->x, nzmax, sizeof (csv), &okx);
     ok = (oki && okj && okx);
     if (ok)
-            A->nzmax = nzmax;
-    return(ok);
+        A->nzmax = nzmax;
+    return (ok);
 }
 
 /* free a sparse matrix */
 csr *csr_spfree(csr *A) {
     if (!A)
-            return(NULL); /* do nothing if A already NULL */
+        return (NULL); /* do nothing if A already NULL */
     cs_free(A->p);
     A->p = NULL;
     cs_free(A->j);
@@ -90,30 +90,30 @@ csr *csr_spfree(csr *A) {
 
 /* allocate a sparse matrix (triplet form or compressed-ROW form) */
 csr *csr_spalloc(csi m, csi n, csi nzmax, int values, int triplet, csv f) {
-    csr* A = (csr*)calloc(1, sizeof(csr)); /* allocate the cs struct */
+    csr* A = (csr*) calloc(1, sizeof (csr)); /* allocate the cs struct */
     if (!A) {
-            perror("sparse allocation failed");
-            return(NULL); /* out of memory */
+        perror("sparse allocation failed");
+        return (NULL); /* out of memory */
     }
     A->m = m; /* define dimensions and nzmax */
     A->n = n;
     A->nzmax = nzmax = CS_MAX(nzmax, 0);
-    A->nr = 0;  // number of nonzero rows
-    A->p = (csi*)calloc(m + 2, sizeof(csi));
-    A->j = (csi*)calloc(CS_MAX(nzmax,1), sizeof(csi));
-    A->x = (csv*)calloc(CS_MAX(nzmax,1), sizeof(csv));
-    return((!A->p || !A->j || !A->x) ? csr_spfree(A) : A);
+    A->nr = 0; // number of nonzero rows
+    A->p = (csi*) calloc(m + 2, sizeof (csi));
+    A->j = (csi*) calloc(CS_MAX(nzmax, 1), sizeof (csi));
+    A->x = (csv*) calloc(CS_MAX(nzmax, 1), sizeof (csv));
+    return ((!A->p || !A->j || !A->x) ? csr_spfree(A) : A);
 }/*}}}*/
 
 /** Multiply two sparse matrices which are stored in CSR format. MKL is used */
 csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, const csv* Ax, csi Bm, csi Bn, csi Bnzmax, const csi* Bp, const csi* Bj, const csv* Bx, long* nummult, csi* xb, csv* x) { /*{{{*/
     csv tf = 0;
     csi p, jp, j, kp, k, i, nz = 0, anz, *Cp, *Cj, m, n,
-        bnz, values = 1;
+            bnz, values = 1;
     csv *Cx;
     csr *C;
     if (An != Bm)
-        return(NULL);
+        return (NULL);
     if (Anzmax == 0 || Bnzmax == 0) {
         C = csr_spalloc(Am, Bn, 0, values, 0, tf);
         return C;
@@ -122,8 +122,8 @@ csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, cons
     anz = Ap[Am];
     n = Bn;
     bnz = Bp[Bm];
-    for(i = 0; i < n; i++) xb[i] = 0;
-    for(i = 0; i < n; i++)
+    for (i = 0; i < n; i++) xb[i] = 0;
+    for (i = 0; i < n; i++)
         xb[i] = 0;
     values = (Ax != NULL) && (Bx != NULL);
     csi tnz = (anz + bnz) * 2;
@@ -132,8 +132,8 @@ csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, cons
         return (csr_done(C, xb, x, 0));
     Cp = C->p;
     for (i = 0; i < m; i++) {
-        if ( ( (nz + n) > C->nzmax ) ) {
-            if(!csr_sprealloc(C, (2 * (C->nzmax) + n) ) ) {
+        if (((nz + n) > C->nzmax)) {
+            if (!csr_sprealloc(C, (2 * (C->nzmax) + n))) {
                 return (csr_done(C, xb, x, 0)); // out of memory
             } else {
             }
diff --git a/common/mmio.h b/common/mmio.h
index d306433fa92f8cce79d55bd625092e4c20b4b11b..196240e2559c8b74c15939ed26366defcb797c83 100644
--- a/common/mmio.h
+++ b/common/mmio.h
@@ -1,10 +1,10 @@
-/* 
-*   Matrix Market I/O library for ANSI C
-*
-*   See http://math.nist.gov/MatrixMarket for details.
-*
-*
-*/
+/*
+ *   Matrix Market I/O library for ANSI C
+ *
+ *   See http://math.nist.gov/MatrixMarket for details.
+ *
+ *
+ */
 
 #ifndef MM_IO_H
 #define MM_IO_H
@@ -45,7 +45,7 @@ int mm_write_mtx_array_size(FILE *f, int M, int N);
 #define mm_is_skew(typecode)	((typecode)[3]=='K')
 #define mm_is_hermitian(typecode)((typecode)[3]=='H')
 
-int mm_is_valid(MM_typecode matcode);		/* too complex for a macro */
+int mm_is_valid(MM_typecode matcode); /* too complex for a macro */
 
 
 /********************* MM_typecode modify fucntions ***************************/
@@ -89,22 +89,22 @@ int mm_is_valid(MM_typecode matcode);		/* too complex for a macro */
 
    MM_matrix_typecode: 4-character sequence
 
-				    ojbect 		sparse/   	data        storage 
-						  		dense     	type        scheme
+                                    ojbect 		sparse/   	data        storage
+                                                                dense     	type        scheme
 
    string position:	 [0]        [1]			[2]         [3]
 
    Matrix typecode:  M(atrix)  C(oord)		R(eal)   	G(eneral)
-						        A(array)	C(omplex)   H(ermitian)
-											P(attern)   S(ymmetric)
-								    		I(nteger)	K(kew)
+                                                        A(array)	C(omplex)   H(ermitian)
+                                                                                        P(attern)   S(ymmetric)
+                                                                                I(nteger)	K(kew)
 
  ***********************************************************************/
 
 #define MM_MTX_STR		"matrix"
 #define MM_ARRAY_STR	"array"
 #define MM_DENSE_STR	"array"
-#define MM_COORDINATE_STR "coordinate" 
+#define MM_COORDINATE_STR "coordinate"
 #define MM_SPARSE_STR	"coordinate"
 #define MM_COMPLEX_STR	"complex"
 #define MM_REAL_STR		"real"
@@ -119,14 +119,14 @@ int mm_is_valid(MM_typecode matcode);		/* too complex for a macro */
 /*  high level routines */
 
 int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
-		 double val[], MM_typecode matcode);
+        double val[], MM_typecode matcode);
 int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
-		double val[], MM_typecode matcode);
+        double val[], MM_typecode matcode);
 int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img,
-			MM_typecode matcode);
+        MM_typecode matcode);
 
 int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
-                double **val_, int **I_, int **J_);
+        double **val_, int **I_, int **J_);
 
 #include <stdio.h>
 #include <string.h>
@@ -134,108 +134,101 @@ int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
 #include <ctype.h>
 
 int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
-                double **val_, int **I_, int **J_)
-{
+        double **val_, int **I_, int **J_) {
     FILE *f;
     MM_typecode matcode;
     int M, N, nz;
     int i;
     double *val;
     int *I, *J;
- 
+
     if ((f = fopen(fname, "r")) == NULL)
-            return -1;
- 
- 
-    if (mm_read_banner(f, &matcode) != 0)
-    {
+        return -1;
+
+
+    if (mm_read_banner(f, &matcode) != 0) {
         printf("mm_read_unsymetric: Could not process Matrix Market banner ");
         printf(" in file [%s]\n", fname);
         return -1;
     }
- 
- 
- 
-    if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) &&
-            mm_is_sparse(matcode)))
-    {
+
+
+
+    if (!(mm_is_real(matcode) && mm_is_matrix(matcode) &&
+            mm_is_sparse(matcode))) {
         fprintf(stderr, "Sorry, this application does not support ");
         fprintf(stderr, "Market Market type: [%s]\n",
                 mm_typecode_to_str(matcode));
         return -1;
     }
- 
+
     /* find out size of sparse matrix: M, N, nz .... */
- 
-    if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0)
-    {
+
+    if (mm_read_mtx_crd_size(f, &M, &N, &nz) != 0) {
         fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n");
         return -1;
     }
- 
+
     *M_ = M;
     *N_ = N;
     *nz_ = nz;
- 
+
     /* reseve memory for matrices */
- 
-    I = (int *) malloc(nz * sizeof(int));
-    J = (int *) malloc(nz * sizeof(int));
-    val = (double *) malloc(nz * sizeof(double));
- 
+
+    I = (int *) malloc(nz * sizeof (int));
+    J = (int *) malloc(nz * sizeof (int));
+    val = (double *) malloc(nz * sizeof (double));
+
     *val_ = val;
     *I_ = I;
     *J_ = J;
- 
+
     /* NOTE: when reading in doubles, ANSI C requires the use of the "l"  */
     /*   specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
     /*  (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15)            */
- 
-    for (i=0; i<nz; i++)
-    {
+
+    for (i = 0; i < nz; i++) {
         fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]);
-        I[i]--;  /* adjust from 1-based to 0-based */
+        I[i]--; /* adjust from 1-based to 0-based */
         J[i]--;
     }
     fclose(f);
- 
+
     return 0;
 }
 
-int mm_is_valid(MM_typecode matcode)
-{
+int mm_is_valid(MM_typecode matcode) {
     if (!mm_is_matrix(matcode)) return 0;
     if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0;
     if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0;
-    if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) || 
-                mm_is_skew(matcode))) return 0;
+    if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) ||
+            mm_is_skew(matcode))) return 0;
     return 1;
 }
 
-int mm_read_banner(FILE *f, MM_typecode *matcode)
-{
+int mm_read_banner(FILE *f, MM_typecode *matcode) {
     char line[MM_MAX_LINE_LENGTH];
     char banner[MM_MAX_TOKEN_LENGTH];
-    char mtx[MM_MAX_TOKEN_LENGTH]; 
+    char mtx[MM_MAX_TOKEN_LENGTH];
     char crd[MM_MAX_TOKEN_LENGTH];
     char data_type[MM_MAX_TOKEN_LENGTH];
     char storage_scheme[MM_MAX_TOKEN_LENGTH];
     char *p;
 
 
-    mm_clear_typecode(matcode);  
+    mm_clear_typecode(matcode);
 
-    if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) 
+    if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
         return MM_PREMATURE_EOF;
 
-    if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type, 
-        storage_scheme) != 5)
+    if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
+            storage_scheme) != 5)
         return MM_PREMATURE_EOF;
 
-    for (p=mtx; *p!='\0'; *p=tolower(*p),p++);  /* convert to lower case */
-    for (p=crd; *p!='\0'; *p=tolower(*p),p++);  
-    for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
-    for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
+    for (p = mtx; *p != '\0'; *p = tolower(*p), p++); /* convert to lower case */
+    for (p = crd; *p != '\0'; *p = tolower(*p), p++);
+    for (p = data_type; *p != '\0'; *p = tolower(*p), p++);
+    for (p = storage_scheme; *p != '\0'; *p = tolower(*p), p++);
 
     /* check for banner */
     if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
@@ -243,7 +236,7 @@ int mm_read_banner(FILE *f, MM_typecode *matcode)
 
     /* first field should be "mtx" */
     if (strcmp(mtx, MM_MTX_STR) != 0)
-        return  MM_UNSUPPORTED_TYPE;
+        return MM_UNSUPPORTED_TYPE;
     mm_set_matrix(matcode);
 
 
@@ -254,59 +247,57 @@ int mm_read_banner(FILE *f, MM_typecode *matcode)
     if (strcmp(crd, MM_SPARSE_STR) == 0)
         mm_set_sparse(matcode);
     else
-    if (strcmp(crd, MM_DENSE_STR) == 0)
-            mm_set_dense(matcode);
+        if (strcmp(crd, MM_DENSE_STR) == 0)
+        mm_set_dense(matcode);
     else
         return MM_UNSUPPORTED_TYPE;
-    
+
 
     /* third field */
 
     if (strcmp(data_type, MM_REAL_STR) == 0)
         mm_set_real(matcode);
     else
-    if (strcmp(data_type, MM_COMPLEX_STR) == 0)
+        if (strcmp(data_type, MM_COMPLEX_STR) == 0)
         mm_set_complex(matcode);
     else
-    if (strcmp(data_type, MM_PATTERN_STR) == 0)
+        if (strcmp(data_type, MM_PATTERN_STR) == 0)
         mm_set_pattern(matcode);
     else
-    if (strcmp(data_type, MM_INT_STR) == 0)
+        if (strcmp(data_type, MM_INT_STR) == 0)
         mm_set_integer(matcode);
     else
         return MM_UNSUPPORTED_TYPE;
-    
+
 
     /* fourth field */
 
     if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
         mm_set_general(matcode);
     else
-    if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
+        if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
         mm_set_symmetric(matcode);
     else
-    if (strcmp(storage_scheme, MM_HERM_STR) == 0)
+        if (strcmp(storage_scheme, MM_HERM_STR) == 0)
         mm_set_hermitian(matcode);
     else
-    if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
+        if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
         mm_set_skew(matcode);
     else
         return MM_UNSUPPORTED_TYPE;
-        
+
 
     return 0;
 }
 
-int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz)
-{
+int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz) {
     if (fprintf(f, "%d %d %d\n", M, N, nz) != 3)
         return MM_COULD_NOT_WRITE_FILE;
-    else 
+    else
         return 0;
 }
 
-int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
-{
+int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz) {
     char line[MM_MAX_LINE_LENGTH];
     int num_items_read;
 
@@ -314,62 +305,53 @@ int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
     *M = *N = *nz = 0;
 
     /* now continue scanning until you reach the end-of-comments */
-    do 
-    {
-        if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL) 
+    do {
+        if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
             return MM_PREMATURE_EOF;
-    }while (line[0] == '%');
+    } while (line[0] == '%');
 
     /* line[] is either blank or has M,N, nz */
     if (sscanf(line, "%d %d %d", M, N, nz) == 3)
         return 0;
-        
+
     else
-    do
-    { 
-        num_items_read = fscanf(f, "%d %d %d", M, N, nz); 
-        if (num_items_read == EOF) return MM_PREMATURE_EOF;
-    }
-    while (num_items_read != 3);
+        do {
+            num_items_read = fscanf(f, "%d %d %d", M, N, nz);
+            if (num_items_read == EOF) return MM_PREMATURE_EOF;
+        } while (num_items_read != 3);
 
     return 0;
 }
 
-
-int mm_read_mtx_array_size(FILE *f, int *M, int *N)
-{
+int mm_read_mtx_array_size(FILE *f, int *M, int *N) {
     char line[MM_MAX_LINE_LENGTH];
     int num_items_read;
     /* set return null parameter values, in case we exit with errors */
     *M = *N = 0;
-	
+
     /* now continue scanning until you reach the end-of-comments */
-    do 
-    {
-        if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL) 
+    do {
+        if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
             return MM_PREMATURE_EOF;
-    }while (line[0] == '%');
+    } while (line[0] == '%');
 
     /* line[] is either blank or has M,N, nz */
     if (sscanf(line, "%d %d", M, N) == 2)
         return 0;
-        
+
     else /* we have a blank line */
-    do
-    { 
-        num_items_read = fscanf(f, "%d %d", M, N); 
-        if (num_items_read == EOF) return MM_PREMATURE_EOF;
-    }
-    while (num_items_read != 2);
+        do {
+            num_items_read = fscanf(f, "%d %d", M, N);
+            if (num_items_read == EOF) return MM_PREMATURE_EOF;
+        } while (num_items_read != 2);
 
     return 0;
 }
 
-int mm_write_mtx_array_size(FILE *f, int M, int N)
-{
+int mm_write_mtx_array_size(FILE *f, int M, int N) {
     if (fprintf(f, "%d %d\n", M, N) != 2)
         return MM_COULD_NOT_WRITE_FILE;
-    else 
+    else
         return 0;
 }
 
@@ -379,67 +361,50 @@ int mm_write_mtx_array_size(FILE *f, int M, int N)
 
 /******************************************************************/
 /* use when I[], J[], and val[]J, and val[] are already allocated */
+
 /******************************************************************/
 
 int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
-        double val[], MM_typecode matcode)
-{
+        double val[], MM_typecode matcode) {
     int i;
-    if (mm_is_complex(matcode))
-    {
-        for (i=0; i<nz; i++)
-            if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2*i], &val[2*i+1])
-                != 4) return MM_PREMATURE_EOF;
-    }
-    else if (mm_is_real(matcode))
-    {
-        for (i=0; i<nz; i++)
-        {
+    if (mm_is_complex(matcode)) {
+        for (i = 0; i < nz; i++)
+            if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2 * i], &val[2 * i + 1])
+                    != 4) return MM_PREMATURE_EOF;
+    } else if (mm_is_real(matcode)) {
+        for (i = 0; i < nz; i++) {
             if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i])
-                != 3) return MM_PREMATURE_EOF;
+                    != 3) return MM_PREMATURE_EOF;
 
         }
-    }
-
-    else if (mm_is_pattern(matcode))
-    {
-        for (i=0; i<nz; i++)
+    } else if (mm_is_pattern(matcode)) {
+        for (i = 0; i < nz; i++)
             if (fscanf(f, "%d %d", &I[i], &J[i])
-                != 2) return MM_PREMATURE_EOF;
-    }
-    else
+                    != 2) return MM_PREMATURE_EOF;
+    } else
         return MM_UNSUPPORTED_TYPE;
 
     return 0;
-        
+
 }
 
 int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
-        double *real, double *imag, MM_typecode matcode)
-{
-    if (mm_is_complex(matcode))
-    {
-            if (fscanf(f, "%d %d %lg %lg", I, J, real, imag)
+        double *real, double *imag, MM_typecode matcode) {
+    if (mm_is_complex(matcode)) {
+        if (fscanf(f, "%d %d %lg %lg", I, J, real, imag)
                 != 4) return MM_PREMATURE_EOF;
-    }
-    else if (mm_is_real(matcode))
-    {
-            if (fscanf(f, "%d %d %lg\n", I, J, real)
+    } else if (mm_is_real(matcode)) {
+        if (fscanf(f, "%d %d %lg\n", I, J, real)
                 != 3) return MM_PREMATURE_EOF;
 
-    }
-
-    else if (mm_is_pattern(matcode))
-    {
-            if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
-    }
-    else
+    } else if (mm_is_pattern(matcode)) {
+        if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
+    } else
         return MM_UNSUPPORTED_TYPE;
 
     return 0;
-        
-}
 
+}
 
 /************************************************************************
     mm_read_mtx_crd()  fills M, N, nz, array of values, and return
@@ -447,24 +412,23 @@ int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
 
                         if matrix is complex, values[] is of size 2*nz,
                             (nz pairs of real/imaginary values)
-************************************************************************/
+ ************************************************************************/
 
-int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J, 
-        double **val, MM_typecode *matcode)
-{
+int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
+        double **val, MM_typecode *matcode) {
     int ret_code;
     FILE *f;
 
-    if (strcmp(fname, "stdin") == 0) f=stdin;
+    if (strcmp(fname, "stdin") == 0) f = stdin;
     else
-    if ((f = fopen(fname, "r")) == NULL)
+        if ((f = fopen(fname, "r")) == NULL)
         return MM_COULD_NOT_READ_FILE;
 
 
     if ((ret_code = mm_read_banner(f, matcode)) != 0)
         return ret_code;
 
-    if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) && 
+    if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) &&
             mm_is_matrix(*matcode)))
         return MM_UNSUPPORTED_TYPE;
 
@@ -472,28 +436,22 @@ int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
         return ret_code;
 
 
-    *I = (int *)  malloc(*nz * sizeof(int));
-    *J = (int *)  malloc(*nz * sizeof(int));
+    *I = (int *) malloc(*nz * sizeof (int));
+    *J = (int *) malloc(*nz * sizeof (int));
     *val = NULL;
 
-    if (mm_is_complex(*matcode))
-    {
-        *val = (double *) malloc(*nz * 2 * sizeof(double));
-        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, 
+    if (mm_is_complex(*matcode)) {
+        *val = (double *) malloc(*nz * 2 * sizeof (double));
+        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
                 *matcode);
         if (ret_code != 0) return ret_code;
-    }
-    else if (mm_is_real(*matcode))
-    {
-        *val = (double *) malloc(*nz * sizeof(double));
-        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, 
+    } else if (mm_is_real(*matcode)) {
+        *val = (double *) malloc(*nz * sizeof (double));
+        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
                 *matcode);
         if (ret_code != 0) return ret_code;
-    }
-
-    else if (mm_is_pattern(*matcode))
-    {
-        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, 
+    } else if (mm_is_pattern(*matcode)) {
+        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
                 *matcode);
         if (ret_code != 0) return ret_code;
     }
@@ -502,31 +460,29 @@ int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
     return 0;
 }
 
-int mm_write_banner(FILE *f, MM_typecode matcode)
-{
+int mm_write_banner(FILE *f, MM_typecode matcode) {
     char *str = mm_typecode_to_str(matcode);
     int ret_code;
 
     ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str);
     free(str);
-    if (ret_code !=2 )
+    if (ret_code != 2)
         return MM_COULD_NOT_WRITE_FILE;
     else
         return 0;
 }
 
 int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
-        double val[], MM_typecode matcode)
-{
+        double val[], MM_typecode matcode) {
     FILE *f;
     int i;
 
-    if (strcmp(fname, "stdout") == 0) 
+    if (strcmp(fname, "stdout") == 0)
         f = stdout;
     else
-    if ((f = fopen(fname, "w")) == NULL)
+        if ((f = fopen(fname, "w")) == NULL)
         return MM_COULD_NOT_WRITE_FILE;
-    
+
     /* print banner followed by typecode */
     fprintf(f, "%s ", MatrixMarketBanner);
     fprintf(f, "%s\n", mm_typecode_to_str(matcode));
@@ -536,59 +492,55 @@ int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
 
     /* print values */
     if (mm_is_pattern(matcode))
-        for (i=0; i<nz; i++)
+        for (i = 0; i < nz; i++)
             fprintf(f, "%d %d\n", I[i], J[i]);
     else
-    if (mm_is_real(matcode))
-        for (i=0; i<nz; i++)
+        if (mm_is_real(matcode))
+        for (i = 0; i < nz; i++)
             fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]);
     else
-    if (mm_is_complex(matcode))
-        for (i=0; i<nz; i++)
-            fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2*i], 
-                        val[2*i+1]);
-    else
-    {
+        if (mm_is_complex(matcode))
+        for (i = 0; i < nz; i++)
+            fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2 * i],
+                val[2 * i + 1]);
+    else {
         if (f != stdout) fclose(f);
         return MM_UNSUPPORTED_TYPE;
     }
 
-    if (f !=stdout) fclose(f);
+    if (f != stdout) fclose(f);
 
     return 0;
 }
-  
 
 /**
-*  Create a new copy of a string s.  mm_strdup() is a common routine, but
-*  not part of ANSI C, so it is included here.  Used by mm_typecode_to_str().
-*
-*/
-char *mm_strdup(const char *s)
-{
-	int len = strlen(s);
-	char *s2 = (char *) malloc((len+1)*sizeof(char));
-	return strcpy(s2, s);
+ *  Create a new copy of a string s.  mm_strdup() is a common routine, but
+ *  not part of ANSI C, so it is included here.  Used by mm_typecode_to_str().
+ *
+ */
+char *mm_strdup(const char *s) {
+    int len = strlen(s);
+    char *s2 = (char *) malloc((len + 1) * sizeof (char));
+    return strcpy(s2, s);
 }
 
-char  *mm_typecode_to_str(MM_typecode matcode)
-{
+char *mm_typecode_to_str(MM_typecode matcode) {
     char buffer[MM_MAX_LINE_LENGTH];
     char *types[4];
-	char *mm_strdup(const char *);
-    int error =0;
+    char *mm_strdup(const char *);
+    int error = 0;
 
     /* check for MTX type */
-    if (mm_is_matrix(matcode)) 
+    if (mm_is_matrix(matcode))
         types[0] = MM_MTX_STR;
     else
-        error=1;
+        error = 1;
 
     /* check for CRD or ARR matrix */
     if (mm_is_sparse(matcode))
         types[1] = MM_SPARSE_STR;
     else
-    if (mm_is_dense(matcode))
+        if (mm_is_dense(matcode))
         types[1] = MM_DENSE_STR;
     else
         return NULL;
@@ -597,13 +549,13 @@ char  *mm_typecode_to_str(MM_typecode matcode)
     if (mm_is_real(matcode))
         types[2] = MM_REAL_STR;
     else
-    if (mm_is_complex(matcode))
+        if (mm_is_complex(matcode))
         types[2] = MM_COMPLEX_STR;
     else
-    if (mm_is_pattern(matcode))
+        if (mm_is_pattern(matcode))
         types[2] = MM_PATTERN_STR;
     else
-    if (mm_is_integer(matcode))
+        if (mm_is_integer(matcode))
         types[2] = MM_INT_STR;
     else
         return NULL;
@@ -613,81 +565,86 @@ char  *mm_typecode_to_str(MM_typecode matcode)
     if (mm_is_general(matcode))
         types[3] = MM_GENERAL_STR;
     else
-    if (mm_is_symmetric(matcode))
+        if (mm_is_symmetric(matcode))
         types[3] = MM_SYMM_STR;
-    else 
-    if (mm_is_hermitian(matcode))
+    else
+        if (mm_is_hermitian(matcode))
         types[3] = MM_HERM_STR;
-    else 
-    if (mm_is_skew(matcode))
+    else
+        if (mm_is_skew(matcode))
         types[3] = MM_SKEW_STR;
     else
         return NULL;
 
-    sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]);
+    sprintf(buffer, "%s %s %s %s", types[0], types[1], types[2], types[3]);
     return mm_strdup(buffer);
 
 }
+
 /** Read Matrix Market file into COO matrix */
-void read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval){ /*{{{*/
-/* 
-*   taken from Matrix Market I/O library for ANSI C
-*
-*   See http://math.nist.gov/MatrixMarket for details.
-*
-*
-*/
-
-int i, M, N, nz, *I, *J;
-double* val;
-int ret_code;
-MM_typecode matcode;
-FILE* f;
-if ((f = fopen(strpath, "r")) == NULL) {fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);exit(1);}
-/* READ MATRIX */
-if (mm_read_banner(f, &matcode) != 0) {
-    printf("Could not process Matrix Market banner.\n");    exit(1);
-}
-/*  This is how one can screen matrix types if their application */
-/*  only supports a subset of the Matrix Market data types.      */
-if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode) ) {
-    printf("Sorry, this application does not support ");
-    printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));exit(1);
-}
-/* find out size of sparse matrix .... */
-if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) !=0) exit(1);
-/* reseve memory for matrices */
-I = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int));
-J = (int *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(int));
-val = (double *) malloc((mm_is_symmetric(matcode) ? 2*nz : nz) * sizeof(double));
-*pI = I;
-*pJ = J;
-*pval = val;
-/* NOTE: when reading in doubles, ANSI C requires the use of the "l"  */
-/*   specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
-/*  (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15)            */
-int realnnz = 0;
-for (i=0; i<nz; i++) {
-    if(mm_is_pattern(matcode)) {
-        fscanf(f, "%d %d\n", &I[realnnz], &J[realnnz]);
-        val[realnnz] = 1.0;
+void read_mm(char* strpath, int* pM, int* pN, int* prealnnz, int** pI, int** pJ, double** pval) { /*{{{*/
+    /*
+     *   taken from Matrix Market I/O library for ANSI C
+     *
+     *   See http://math.nist.gov/MatrixMarket for details.
+     *
+     *
+     */
+
+    int i, M, N, nz, *I, *J;
+    double* val;
+    int ret_code;
+    MM_typecode matcode;
+    FILE* f;
+    if ((f = fopen(strpath, "r")) == NULL) {
+        fprintf(stderr, "Input matrix file %s cannot be opened to read.", strpath);
+        exit(1);
     }
-    else
-        fscanf(f, "%d %d %lg\n", &I[realnnz], &J[realnnz], &val[realnnz]);
-    I[realnnz]--;  /* adjust from 1-based to 0-based */
+    /* READ MATRIX */
+    if (mm_read_banner(f, &matcode) != 0) {
+        printf("Could not process Matrix Market banner.\n");
+        exit(1);
+    }
+    /*  This is how one can screen matrix types if their application */
+    /*  only supports a subset of the Matrix Market data types.      */
+    if (mm_is_complex(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode)) {
+        printf("Sorry, this application does not support ");
+        printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));
+        exit(1);
+    }
+    /* find out size of sparse matrix .... */
+    if ((ret_code = mm_read_mtx_crd_size(f, &M, &N, &nz)) != 0) exit(1);
+    /* reseve memory for matrices */
+    I = (int *) malloc((mm_is_symmetric(matcode) ? 2 * nz : nz) * sizeof (int));
+    J = (int *) malloc((mm_is_symmetric(matcode) ? 2 * nz : nz) * sizeof (int));
+    val = (double *) malloc((mm_is_symmetric(matcode) ? 2 * nz : nz) * sizeof (double));
+    *pI = I;
+    *pJ = J;
+    *pval = val;
+    /* NOTE: when reading in doubles, ANSI C requires the use of the "l"  */
+    /*   specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
+    /*  (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15)            */
+    int realnnz = 0;
+    for (i = 0; i < nz; i++) {
+        if (mm_is_pattern(matcode)) {
+            fscanf(f, "%d %d\n", &I[realnnz], &J[realnnz]);
+            val[realnnz] = 1.0;
+        } else
+            fscanf(f, "%d %d %lg\n", &I[realnnz], &J[realnnz], &val[realnnz]);
+        I[realnnz]--; /* adjust from 1-based to 0-based */
         J[realnnz]--;
-    if(mm_is_symmetric(matcode) && I[realnnz] != J[realnnz]) {
-        I[realnnz+1] = J[realnnz];
-        J[realnnz+1] = I[realnnz];
-        val[realnnz+1] = val[realnnz];
+        if (mm_is_symmetric(matcode) && I[realnnz] != J[realnnz]) {
+            I[realnnz + 1] = J[realnnz];
+            J[realnnz + 1] = I[realnnz];
+            val[realnnz + 1] = val[realnnz];
+            realnnz++;
+        }
         realnnz++;
     }
-    realnnz++;
-}
-if (f !=stdin) fclose(f);
-*pM = M;
-*pN = N;
-*prealnnz = realnnz;
+    if (f != stdin) fclose(f);
+    *pM = M;
+    *pN = N;
+    *prealnnz = realnnz;
 } /* ENDOF read_mm }}}*/
 
 #endif
diff --git a/common/util.h b/common/util.h
index f221d117396a28e24c020e322a34a7d70aff7c30..93095188dfcdd2637110f338ecfc2bcccd3ebe44 100644
--- a/common/util.h
+++ b/common/util.h
@@ -8,28 +8,28 @@ MIC_TARGET int option_print_matrices = 0;
 /** Converts COO matrix to CSR matrix */
 void coo_to_csr(int m, int nnz, int* I, int* J, double* val, MKL_INT* AI, MKL_INT* AJ, double* Aval) { /*{{{*/
     MKL_INT info = 0;
-    MKL_INT job[8];   
-    job[1]=1; // one based indexing in csr
-    
-    job[2]=0; // zero based indexing in coo
-    job[3]=2; // I don't know
-    job[4]=nnz; // nnz
-    
-    job[0]=1;  // coo to csr
-    job[5]=0;  // Acsr and AJR allocated by user
-    mkl_dcsrcoo (job,&m, Aval, AJ, AI, &nnz, val, I, J, &info);
+    MKL_INT job[8];
+    job[1] = 1; // one based indexing in csr
+
+    job[2] = 0; // zero based indexing in coo
+    job[3] = 2; // I don't know
+    job[4] = nnz; // nnz
+
+    job[0] = 1; // coo to csr
+    job[5] = 0; // Acsr and AJR allocated by user
+    mkl_dcsrcoo(job, &m, Aval, AJ, AI, &nnz, val, I, J, &info);
 } /* ENDOF coo_to_csr }}}*/
 
 /** Prints matrix in CSR format */
-void MIC_TARGET printmm_one(int m, double* Aval, int* AJ, int* AI){ //{{{
+void MIC_TARGET printmm_one(int m, double* Aval, int* AJ, int* AI) { //{{{
 
     if (option_print_matrices == OPTION_NOPRINT_MATRICES)
         return;
     int i;
-    for(i = 0; i < m; i++) {
-        printf("%d: ", i+1);
+    for (i = 0; i < m; i++) {
+        printf("%d: ", i + 1);
         int j;
-        for(j = AI[i]-1; j < AI[i+1]-1; j++) {
+        for (j = AI[i] - 1; j < AI[i + 1] - 1; j++) {
             printf("%d:%g  ", AJ[j], Aval[j]);
         }
         printf("\n");
@@ -38,20 +38,20 @@ void MIC_TARGET printmm_one(int m, double* Aval, int* AJ, int* AI){ //{{{
 }//}}}
 
 /** Writes matrix in CSR format in to a file using Matrix Market format */
-void MIC_TARGET printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI){//{{{
+void MIC_TARGET printfilemm_one(char* file, int m, int n, double* Aval, int* AJ, int* AI) {//{{{
 
     FILE* f = fopen(file, "w");
-    if(f == NULL){
+    if (f == NULL) {
         printf("%s %s %d: %s cannot be opened to write matrix\n", __FILE__, __PRETTY_FUNCTION__, __LINE__, file);
-        exit(1); 
+        exit(1);
     }
     int i;
     fprintf(f, "%%%%MatrixMarket matrix coordinate real general\n");
-    fprintf(f, "%d %d %d\n", m, n, AI[m]-1);
-    for(i = 0; i < m; i++) {
+    fprintf(f, "%d %d %d\n", m, n, AI[m] - 1);
+    for (i = 0; i < m; i++) {
         int j;
-        for(j = AI[i]-1; j < AI[i+1]-1; j++) {
-            fprintf(f, "%d %d %g\n", i+1, AJ[j], Aval[j]);
+        for (j = AI[i] - 1; j < AI[i + 1] - 1; j++) {
+            fprintf(f, "%d %d %g\n", i + 1, AJ[j], Aval[j]);
         }
     }
     fclose(f);
diff --git a/krylov/ksp_solver_multi_rhs.c b/krylov/ksp_solver_multi_rhs.c
index 8b6569580035f5f7150272de1cdef562740f5211..7da70b7d30117aefc385d4fe27d88fe7bbe74f39 100644
--- a/krylov/ksp_solver_multi_rhs.c
+++ b/krylov/ksp_solver_multi_rhs.c
@@ -7,9 +7,9 @@ All rights reserved.
 Redistribution and use in source and binary forms, with or without modification,
 are permitted provided that the following conditions are met:
 
-* Redistributions of source code must retain the above copyright notice, this
+ * Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright notice, this
+ * Redistributions in binary form must reproduce the above copyright notice, this
   list of conditions and the following disclaimer in the documentation and/or
   other materials provided with the distribution.
 
@@ -23,7 +23,7 @@ are permitted provided that the following conditions are met:
   ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+ */
 
 static char help[] = "Solves a sequence of linear systems with different right-hand-side vectors.\n\
 Input parameters include:\n\
@@ -46,176 +46,232 @@ T*/
      petscmat.h - matrices
      petscis.h     - index sets            petscksp.h - Krylov subspace methods
      petscviewer.h - viewers               petscpc.h  - preconditioners
-*/
+ */
 #include <petscksp.h>
 
 #undef __FUNCT__
 #define __FUNCT__ "main"
-int main(int argc,char **args)
-{
-  Vec            x,b,u;  /* approx solution, RHS, exact solution */
-  Mat            A;        /* linear system matrix */
-  KSP            ksp;     /* linear solver context */
-  PetscReal      norm;     /* norm of solution error */
-  PetscErrorCode ierr;
-  PetscInt       ntimes,i,j,k,Ii,J,Istart,Iend;
-  PetscInt       m   = 8,n = 7,its;
-  PetscBool      flg = PETSC_FALSE;
-  PetscScalar    v,one = 1.0,neg_one = -1.0,rhs;
-
-  PetscInitialize(&argc,&args,(char*)0,help);
-  ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr);
-  ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr);
-
-  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-         Compute the matrix for use in solving a series of
-         linear systems of the form, A x_i = b_i, for i=1,2,...
-     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
-  /*
-     Create parallel matrix, specifying only its global dimensions.
-     When using MatCreate(), the matrix format can be specified at
-     runtime. Also, the parallel partitioning of the matrix is
-     determined by PETSc at runtime.
-  */
-  ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
-  ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr);
-  ierr = MatSetFromOptions(A);CHKERRQ(ierr);
-  ierr = MatSetUp(A);CHKERRQ(ierr);
-
-  /*
-     Currently, all PETSc parallel matrix formats are partitioned by
-     contiguous chunks of rows across the processors.  Determine which
-     rows of the matrix are locally owned.
-  */
-  ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
-
-  /*
-     Set matrix elements for the 2-D, five-point stencil in parallel.
-      - Each processor needs to insert only elements that it owns
-        locally (but any non-local elements will be sent to the
-        appropriate processor during matrix assembly).
-      - Always specify global rows and columns of matrix entries.
-   */
-  for (Ii=Istart; Ii<Iend; Ii++) {
-    v = -1.0; i = Ii/n; j = Ii - i*n;
-    if (i>0)   {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
-    if (i<m-1) {J = Ii + n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
-    if (j>0)   {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
-    if (j<n-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
-    v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
-  }
-
-  /*
-     Assemble matrix, using the 2-step process:
-       MatAssemblyBegin(), MatAssemblyEnd()
-     Computations can be done while messages are in transition
-     by placing code between these two statements.
-  */
-  ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-  ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-
-  /*
-     Create parallel vectors.
-      - When using VecCreate(), VecSetSizes() and VecSetFromOptions(),
-        we specify only the vector's global
-        dimension; the parallel partitioning is determined at runtime.
-      - When solving a linear system, the vectors and matrices MUST
-        be partitioned accordingly.  PETSc automatically generates
-        appropriately partitioned matrices and vectors when MatCreate()
-        and VecCreate() are used with the same communicator.
-      - Note: We form 1 vector from scratch and then duplicate as needed.
-  */
-  ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr);
-  ierr = VecSetSizes(u,PETSC_DECIDE,m*n);CHKERRQ(ierr);
-  ierr = VecSetFromOptions(u);CHKERRQ(ierr);
-  ierr = VecDuplicate(u,&b);CHKERRQ(ierr);
-  ierr = VecDuplicate(b,&x);CHKERRQ(ierr);
-
-  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-                Create the linear solver and set various options
-     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
-
-  /*
-     Create linear solver context
-  */
-  ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
-
-  /*
-     Set operators. Here the matrix that defines the linear system
-     also serves as the preconditioning matrix.
-  */
-  ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
-
-  /*
-    Set runtime options, e.g.,
-        -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
-    These options will override those specified above as long as
-    KSPSetFromOptions() is called _after_ any other customization
-    routines.
-  */
-  ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
-
-  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-       Solve several linear systems of the form  A x_i = b_i
-       I.e., we retain the same matrix (A) for all systems, but
-       change the right-hand-side vector (b_i) at each step.
-
-       In this case, we simply call KSPSolve() multiple times.  The
-       preconditioner setup operations (e.g., factorization for ILU)
-       be done during the first call to KSPSolve() only; such operations
-       will NOT be repeated for successive solves.
-     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
-
-  ntimes = 2;
-  ierr   = PetscOptionsGetInt(NULL,"-ntimes",&ntimes,NULL);CHKERRQ(ierr);
-  for (k=1; k<ntimes+1; k++) {
+
+int main(int argc, char **args) {
+    Vec x, b, u; /* approx solution, RHS, exact solution */
+    Mat A; /* linear system matrix */
+    KSP ksp; /* linear solver context */
+    PetscReal norm; /* norm of solution error */
+    PetscErrorCode ierr;
+    PetscInt ntimes, i, j, k, Ii, J, Istart, Iend;
+    PetscInt m = 8, n = 7, its;
+    PetscBool flg = PETSC_FALSE;
+    PetscScalar v, one = 1.0, neg_one = -1.0, rhs;
+
+    PetscInitialize(&argc, &args, (char*) 0, help);
+    ierr = PetscOptionsGetInt(NULL, "-m", &m, NULL);
+    CHKERRQ(ierr);
+    ierr = PetscOptionsGetInt(NULL, "-n", &n, NULL);
+    CHKERRQ(ierr);
+
+    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+           Compute the matrix for use in solving a series of
+           linear systems of the form, A x_i = b_i, for i=1,2,...
+       - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+    /*
+       Create parallel matrix, specifying only its global dimensions.
+       When using MatCreate(), the matrix format can be specified at
+       runtime. Also, the parallel partitioning of the matrix is
+       determined by PETSc at runtime.
+     */
+    ierr = MatCreate(PETSC_COMM_WORLD, &A);
+    CHKERRQ(ierr);
+    ierr = MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m*n, m * n);
+    CHKERRQ(ierr);
+    ierr = MatSetFromOptions(A);
+    CHKERRQ(ierr);
+    ierr = MatSetUp(A);
+    CHKERRQ(ierr);
+
+    /*
+       Currently, all PETSc parallel matrix formats are partitioned by
+       contiguous chunks of rows across the processors.  Determine which
+       rows of the matrix are locally owned.
+     */
+    ierr = MatGetOwnershipRange(A, &Istart, &Iend);
+    CHKERRQ(ierr);
+
+    /*
+       Set matrix elements for the 2-D, five-point stencil in parallel.
+        - Each processor needs to insert only elements that it owns
+          locally (but any non-local elements will be sent to the
+          appropriate processor during matrix assembly).
+        - Always specify global rows and columns of matrix entries.
+     */
+    for (Ii = Istart; Ii < Iend; Ii++) {
+        v = -1.0;
+        i = Ii / n;
+        j = Ii - i*n;
+        if (i > 0) {
+            J = Ii - n;
+            ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
+            CHKERRQ(ierr);
+        }
+        if (i < m - 1) {
+            J = Ii + n;
+            ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
+            CHKERRQ(ierr);
+        }
+        if (j > 0) {
+            J = Ii - 1;
+            ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
+            CHKERRQ(ierr);
+        }
+        if (j < n - 1) {
+            J = Ii + 1;
+            ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
+            CHKERRQ(ierr);
+        }
+        v = 4.0;
+        ierr = MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
+        CHKERRQ(ierr);
+    }
+
+    /*
+       Assemble matrix, using the 2-step process:
+         MatAssemblyBegin(), MatAssemblyEnd()
+       Computations can be done while messages are in transition
+       by placing code between these two statements.
+     */
+    ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
+    CHKERRQ(ierr);
+    ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
+    CHKERRQ(ierr);
+
+    /*
+       Create parallel vectors.
+        - When using VecCreate(), VecSetSizes() and VecSetFromOptions(),
+          we specify only the vector's global
+          dimension; the parallel partitioning is determined at runtime.
+        - When solving a linear system, the vectors and matrices MUST
+          be partitioned accordingly.  PETSc automatically generates
+          appropriately partitioned matrices and vectors when MatCreate()
+          and VecCreate() are used with the same communicator.
+        - Note: We form 1 vector from scratch and then duplicate as needed.
+     */
+    ierr = VecCreate(PETSC_COMM_WORLD, &u);
+    CHKERRQ(ierr);
+    ierr = VecSetSizes(u, PETSC_DECIDE, m * n);
+    CHKERRQ(ierr);
+    ierr = VecSetFromOptions(u);
+    CHKERRQ(ierr);
+    ierr = VecDuplicate(u, &b);
+    CHKERRQ(ierr);
+    ierr = VecDuplicate(b, &x);
+    CHKERRQ(ierr);
+
+    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+                  Create the linear solver and set various options
+       - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+    /*
+       Create linear solver context
+     */
+    ierr = KSPCreate(PETSC_COMM_WORLD, &ksp);
+    CHKERRQ(ierr);
 
     /*
-       Set exact solution; then compute right-hand-side vector.  We use
-       an exact solution of a vector with all elements equal to 1.0*k.
-    */
-    rhs  = one * (PetscReal)k;
-    ierr = VecSet(u,rhs);CHKERRQ(ierr);
-    ierr = MatMult(A,u,b);CHKERRQ(ierr);
+       Set operators. Here the matrix that defines the linear system
+       also serves as the preconditioning matrix.
+     */
+    ierr = KSPSetOperators(ksp, A, A);
+    CHKERRQ(ierr);
 
     /*
-       View the exact solution vector if desired
-    */
-    ierr = PetscOptionsGetBool(NULL,"-view_exact_sol",&flg,NULL);CHKERRQ(ierr);
-    if (flg) {ierr = VecView(u,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
+      Set runtime options, e.g.,
+          -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
+      These options will override those specified above as long as
+      KSPSetFromOptions() is called _after_ any other customization
+      routines.
+     */
+    ierr = KSPSetFromOptions(ksp);
+    CHKERRQ(ierr);
+
+    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+         Solve several linear systems of the form  A x_i = b_i
+         I.e., we retain the same matrix (A) for all systems, but
+         change the right-hand-side vector (b_i) at each step.
+
+         In this case, we simply call KSPSolve() multiple times.  The
+         preconditioner setup operations (e.g., factorization for ILU)
+         be done during the first call to KSPSolve() only; such operations
+         will NOT be repeated for successive solves.
+       - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 
-    ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
+    ntimes = 2;
+    ierr = PetscOptionsGetInt(NULL, "-ntimes", &ntimes, NULL);
+    CHKERRQ(ierr);
+    for (k = 1; k < ntimes + 1; k++) {
 
+        /*
+           Set exact solution; then compute right-hand-side vector.  We use
+           an exact solution of a vector with all elements equal to 1.0*k.
+         */
+        rhs = one * (PetscReal) k;
+        ierr = VecSet(u, rhs);
+        CHKERRQ(ierr);
+        ierr = MatMult(A, u, b);
+        CHKERRQ(ierr);
+
+        /*
+           View the exact solution vector if desired
+         */
+        ierr = PetscOptionsGetBool(NULL, "-view_exact_sol", &flg, NULL);
+        CHKERRQ(ierr);
+        if (flg) {
+            ierr = VecView(u, PETSC_VIEWER_STDOUT_WORLD);
+            CHKERRQ(ierr);
+        }
+
+        ierr = KSPSolve(ksp, b, x);
+        CHKERRQ(ierr);
+
+        /*
+           Check the error
+         */
+        ierr = VecAXPY(x, neg_one, u);
+        CHKERRQ(ierr);
+        ierr = VecNorm(x, NORM_2, &norm);
+        CHKERRQ(ierr);
+        ierr = KSPGetIterationNumber(ksp, &its);
+        CHKERRQ(ierr);
+        /*
+           Print convergence information.  PetscPrintf() produces a single
+           print statement from all processes that share a communicator.
+         */
+        ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g System %D: iterations %D\n", (double) norm, k, its);
+        CHKERRQ(ierr);
+    }
+
+    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+                        Clean up
+       - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
     /*
-       Check the error
-    */
-    ierr = VecAXPY(x,neg_one,u);CHKERRQ(ierr);
-    ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
-    ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr);
+       Free work space.  All PETSc objects should be destroyed when they
+       are no longer needed.
+     */
+    ierr = KSPDestroy(&ksp);
+    CHKERRQ(ierr);
+    ierr = VecDestroy(&u);
+    CHKERRQ(ierr);
+    ierr = VecDestroy(&x);
+    CHKERRQ(ierr);
+    ierr = VecDestroy(&b);
+    CHKERRQ(ierr);
+    ierr = MatDestroy(&A);
+    CHKERRQ(ierr);
+
     /*
-       Print convergence information.  PetscPrintf() produces a single
-       print statement from all processes that share a communicator.
-    */
-    ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g System %D: iterations %D\n",(double)norm,k,its);CHKERRQ(ierr);
-  }
-
-  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-                      Clean up
-     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
-  /*
-     Free work space.  All PETSc objects should be destroyed when they
-     are no longer needed.
-  */
-  ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
-  ierr = VecDestroy(&u);CHKERRQ(ierr);  ierr = VecDestroy(&x);CHKERRQ(ierr);
-  ierr = VecDestroy(&b);CHKERRQ(ierr);  ierr = MatDestroy(&A);CHKERRQ(ierr);
-
-  /*
-     Always call PetscFinalize() before exiting a program.  This routine
-       - finalizes the PETSc libraries as well as MPI
-       - provides summary and diagnostic information if certain runtime
-         options are chosen (e.g., -log_summary).
-  */
-  ierr = PetscFinalize();
-  return 0;
+       Always call PetscFinalize() before exiting a program.  This routine
+         - finalizes the PETSc libraries as well as MPI
+         - provides summary and diagnostic information if certain runtime
+           options are chosen (e.g., -log_summary).
+     */
+    ierr = PetscFinalize();
+    return 0;
 }
diff --git a/krylov/ksp_solver_simple.c b/krylov/ksp_solver_simple.c
index e8815f16a2d5679296bdea1b9d0c28757506cdcb..d1595c2d7a431549e27d32ccc47b9e5cb8a4e230 100644
--- a/krylov/ksp_solver_simple.c
+++ b/krylov/ksp_solver_simple.c
@@ -7,9 +7,9 @@ All rights reserved.
 Redistribution and use in source and binary forms, with or without modification,
 are permitted provided that the following conditions are met:
 
-* Redistributions of source code must retain the above copyright notice, this
+ * Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright notice, this
+ * Redistributions in binary form must reproduce the above copyright notice, this
   list of conditions and the following disclaimer in the documentation and/or
   other materials provided with the distribution.
 
@@ -23,7 +23,7 @@ are permitted provided that the following conditions are met:
   ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+ */
 
 static char help[] = "Solves a linear system in parallel with KSP.\n\
 Input parameters include:\n\
@@ -46,214 +46,280 @@ T*/
      petscmat.h - matrices
      petscis.h     - index sets            petscksp.h - Krylov subspace methods
      petscviewer.h - viewers               petscpc.h  - preconditioners
-*/
+ */
 #include <petscksp.h>
 
 #undef __FUNCT__
 #define __FUNCT__ "main"
-int main(int argc,char **args)
-{
-  Vec            x,b,u;  /* approx solution, RHS, exact solution */
-  Mat            A;        /* linear system matrix */
-  KSP            ksp;     /* linear solver context */
-  PetscRandom    rctx;     /* random number generator context */
-  PetscReal      norm;     /* norm of solution error */
-  PetscInt       i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
-  PetscErrorCode ierr;
-  PetscBool      flg = PETSC_FALSE;
-  PetscScalar    v;
+
+int main(int argc, char **args) {
+    Vec x, b, u; /* approx solution, RHS, exact solution */
+    Mat A; /* linear system matrix */
+    KSP ksp; /* linear solver context */
+    PetscRandom rctx; /* random number generator context */
+    PetscReal norm; /* norm of solution error */
+    PetscInt i, j, Ii, J, Istart, Iend, m = 8, n = 7, its;
+    PetscErrorCode ierr;
+    PetscBool flg = PETSC_FALSE;
+    PetscScalar v;
 #if defined(PETSC_USE_LOG)
-  PetscLogStage stage;
+    PetscLogStage stage;
 #endif
 
-  PetscInitialize(&argc,&args,(char*)0,help);
-  ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr);
-  ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr);
-  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-         Compute the matrix and right-hand-side vector that define
-         the linear system, Ax = b.
-     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
-  /*
-     Create parallel matrix, specifying only its global dimensions.
-     When using MatCreate(), the matrix format can be specified at
-     runtime. Also, the parallel partitioning of the matrix is
-     determined by PETSc at runtime.
-
-     Performance tuning note:  For problems of substantial size,
-     preallocation of matrix memory is crucial for attaining good
-     performance. See the matrix chapter of the users manual for details.
-  */
-  ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
-  ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr);
-  ierr = MatSetFromOptions(A);CHKERRQ(ierr);
-  ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr);
-  ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr);
-
-  /*
-     Currently, all PETSc parallel matrix formats are partitioned by
-     contiguous chunks of rows across the processors.  Determine which
-     rows of the matrix are locally owned.
-  */
-  ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
-
-  /*
-     Set matrix elements for the 2-D, five-point stencil in parallel.
-      - Each processor needs to insert only elements that it owns
-        locally (but any non-local elements will be sent to the
-        appropriate processor during matrix assembly).
-      - Always specify global rows and columns of matrix entries.
-
-     Note: this uses the less common natural ordering that orders first
-     all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
-     instead of J = I +- m as you might expect. The more standard ordering
-     would first do all variables for y = h, then y = 2h etc.
-
-   */
-  ierr = PetscLogStageRegister("Assembly", &stage);CHKERRQ(ierr);
-  ierr = PetscLogStagePush(stage);CHKERRQ(ierr);
-  for (Ii=Istart; Ii<Iend; Ii++) {
-    v = -1.0; i = Ii/n; j = Ii - i*n;
-    if (i>0)   {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-    if (i<m-1) {J = Ii + n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-    if (j>0)   {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-    if (j<n-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-    v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr);
-  }
-
-  /*
-     Assemble matrix, using the 2-step process:
-       MatAssemblyBegin(), MatAssemblyEnd()
-     Computations can be done while messages are in transition
-     by placing code between these two statements.
-  */
-  ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-  ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-  ierr = PetscLogStagePop();CHKERRQ(ierr);
-
-  /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
-  ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
-
-  /*
-     Create parallel vectors.
-      - We form 1 vector from scratch and then duplicate as needed.
-      - When using VecCreate(), VecSetSizes and VecSetFromOptions()
-        in this example, we specify only the
-        vector's global dimension; the parallel partitioning is determined
-        at runtime.
-      - When solving a linear system, the vectors and matrices MUST
-        be partitioned accordingly.  PETSc automatically generates
-        appropriately partitioned matrices and vectors when MatCreate()
-        and VecCreate() are used with the same communicator.
-      - The user can alternatively specify the local vector and matrix
-        dimensions when more sophisticated partitioning is needed
-        (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
-        below).
-  */
-  ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr);
-  ierr = VecSetSizes(u,PETSC_DECIDE,m*n);CHKERRQ(ierr);
-  ierr = VecSetFromOptions(u);CHKERRQ(ierr);
-  ierr = VecDuplicate(u,&b);CHKERRQ(ierr);
-  ierr = VecDuplicate(b,&x);CHKERRQ(ierr);
-
-  /*
-     Set exact solution; then compute right-hand-side vector.
-     By default we use an exact solution of a vector with all
-     elements of 1.0;  Alternatively, using the runtime option
-     -random_sol forms a solution vector with random components.
-  */
-  ierr = PetscOptionsGetBool(NULL,"-random_exact_sol",&flg,NULL);CHKERRQ(ierr);
-  if (flg) {
-    ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rctx);CHKERRQ(ierr);
-    ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr);
-    ierr = VecSetRandom(u,rctx);CHKERRQ(ierr);
-    ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);
-  } else {
-    ierr = VecSet(u,1.0);CHKERRQ(ierr);
-  }
-  ierr = MatMult(A,u,b);CHKERRQ(ierr);
-
-  /*
-     View the exact solution vector if desired
-  */
-  flg  = PETSC_FALSE;
-  ierr = PetscOptionsGetBool(NULL,"-view_exact_sol",&flg,NULL);CHKERRQ(ierr);
-  if (flg) {ierr = VecView(u,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
-
-  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-                Create the linear solver and set various options
-     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
-
-  /*
-     Create linear solver context
-  */
-  ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
-
-  /*
-     Set operators. Here the matrix that defines the linear system
-     also serves as the preconditioning matrix.
-  */
-  ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
-
-  /*
-     Set linear solver defaults for this problem (optional).
-     - By extracting the KSP and PC contexts from the KSP context,
-       we can then directly call any KSP and PC routines to set
-       various options.
-     - The following two statements are optional; all of these
-       parameters could alternatively be specified at runtime via
-       KSPSetFromOptions().  All of these defaults can be
-       overridden at runtime, as indicated below.
-  */
-  ierr = KSPSetTolerances(ksp,1.e-2/((m+1)*(n+1)),1.e-50,PETSC_DEFAULT,
-                          PETSC_DEFAULT);CHKERRQ(ierr);
-
-  /*
-    Set runtime options, e.g.,
-        -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
-    These options will override those specified above as long as
-    KSPSetFromOptions() is called _after_ any other customization
-    routines.
-  */
-  ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
-
-  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-                      Solve the linear system
-     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
-
-  ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
-
-  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-                      Check solution and clean up
-     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
-
-  /*
-     Check the error
-  */
-  ierr = VecAXPY(x,-1.0,u);CHKERRQ(ierr);
-  ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
-  ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr);
-
-  /*
-     Print convergence information.  PetscPrintf() produces a single
-     print statement from all processes that share a communicator.
-     An alternative is PetscFPrintf(), which prints to a file.
-  */
-  ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);CHKERRQ(ierr);
-
-  /*
-     Free work space.  All PETSc objects should be destroyed when they
-     are no longer needed.
-  */
-  ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
-  ierr = VecDestroy(&u);CHKERRQ(ierr);  ierr = VecDestroy(&x);CHKERRQ(ierr);
-  ierr = VecDestroy(&b);CHKERRQ(ierr);  ierr = MatDestroy(&A);CHKERRQ(ierr);
-
-  /*
-     Always call PetscFinalize() before exiting a program.  This routine
-       - finalizes the PETSc libraries as well as MPI
-       - provides summary and diagnostic information if certain runtime
-         options are chosen (e.g., -log_summary).
-  */
-  ierr = PetscFinalize();
-  return 0;
+    PetscInitialize(&argc, &args, (char*) 0, help);
+    ierr = PetscOptionsGetInt(NULL, "-m", &m, NULL);
+    CHKERRQ(ierr);
+    ierr = PetscOptionsGetInt(NULL, "-n", &n, NULL);
+    CHKERRQ(ierr);
+    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+           Compute the matrix and right-hand-side vector that define
+           the linear system, Ax = b.
+       - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+    /*
+       Create parallel matrix, specifying only its global dimensions.
+       When using MatCreate(), the matrix format can be specified at
+       runtime. Also, the parallel partitioning of the matrix is
+       determined by PETSc at runtime.
+
+       Performance tuning note:  For problems of substantial size,
+       preallocation of matrix memory is crucial for attaining good
+       performance. See the matrix chapter of the users manual for details.
+     */
+    ierr = MatCreate(PETSC_COMM_WORLD, &A);
+    CHKERRQ(ierr);
+    ierr = MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m*n, m * n);
+    CHKERRQ(ierr);
+    ierr = MatSetFromOptions(A);
+    CHKERRQ(ierr);
+    ierr = MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL);
+    CHKERRQ(ierr);
+    ierr = MatSeqAIJSetPreallocation(A, 5, NULL);
+    CHKERRQ(ierr);
+
+    /*
+       Currently, all PETSc parallel matrix formats are partitioned by
+       contiguous chunks of rows across the processors.  Determine which
+       rows of the matrix are locally owned.
+     */
+    ierr = MatGetOwnershipRange(A, &Istart, &Iend);
+    CHKERRQ(ierr);
+
+    /*
+       Set matrix elements for the 2-D, five-point stencil in parallel.
+        - Each processor needs to insert only elements that it owns
+          locally (but any non-local elements will be sent to the
+          appropriate processor during matrix assembly).
+        - Always specify global rows and columns of matrix entries.
+
+       Note: this uses the less common natural ordering that orders first
+       all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
+       instead of J = I +- m as you might expect. The more standard ordering
+       would first do all variables for y = h, then y = 2h etc.
+
+     */
+    ierr = PetscLogStageRegister("Assembly", &stage);
+    CHKERRQ(ierr);
+    ierr = PetscLogStagePush(stage);
+    CHKERRQ(ierr);
+    for (Ii = Istart; Ii < Iend; Ii++) {
+        v = -1.0;
+        i = Ii / n;
+        j = Ii - i*n;
+        if (i > 0) {
+            J = Ii - n;
+            ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
+            CHKERRQ(ierr);
+        }
+        if (i < m - 1) {
+            J = Ii + n;
+            ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
+            CHKERRQ(ierr);
+        }
+        if (j > 0) {
+            J = Ii - 1;
+            ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
+            CHKERRQ(ierr);
+        }
+        if (j < n - 1) {
+            J = Ii + 1;
+            ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
+            CHKERRQ(ierr);
+        }
+        v = 4.0;
+        ierr = MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
+        CHKERRQ(ierr);
+    }
+
+    /*
+       Assemble matrix, using the 2-step process:
+         MatAssemblyBegin(), MatAssemblyEnd()
+       Computations can be done while messages are in transition
+       by placing code between these two statements.
+     */
+    ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
+    CHKERRQ(ierr);
+    ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
+    CHKERRQ(ierr);
+    ierr = PetscLogStagePop();
+    CHKERRQ(ierr);
+
+    /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
+    ierr = MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE);
+    CHKERRQ(ierr);
+
+    /*
+       Create parallel vectors.
+        - We form 1 vector from scratch and then duplicate as needed.
+        - When using VecCreate(), VecSetSizes and VecSetFromOptions()
+          in this example, we specify only the
+          vector's global dimension; the parallel partitioning is determined
+          at runtime.
+        - When solving a linear system, the vectors and matrices MUST
+          be partitioned accordingly.  PETSc automatically generates
+          appropriately partitioned matrices and vectors when MatCreate()
+          and VecCreate() are used with the same communicator.
+        - The user can alternatively specify the local vector and matrix
+          dimensions when more sophisticated partitioning is needed
+          (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
+          below).
+     */
+    ierr = VecCreate(PETSC_COMM_WORLD, &u);
+    CHKERRQ(ierr);
+    ierr = VecSetSizes(u, PETSC_DECIDE, m * n);
+    CHKERRQ(ierr);
+    ierr = VecSetFromOptions(u);
+    CHKERRQ(ierr);
+    ierr = VecDuplicate(u, &b);
+    CHKERRQ(ierr);
+    ierr = VecDuplicate(b, &x);
+    CHKERRQ(ierr);
+
+    /*
+       Set exact solution; then compute right-hand-side vector.
+       By default we use an exact solution of a vector with all
+       elements of 1.0;  Alternatively, using the runtime option
+       -random_sol forms a solution vector with random components.
+     */
+    ierr = PetscOptionsGetBool(NULL, "-random_exact_sol", &flg, NULL);
+    CHKERRQ(ierr);
+    if (flg) {
+        ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rctx);
+        CHKERRQ(ierr);
+        ierr = PetscRandomSetFromOptions(rctx);
+        CHKERRQ(ierr);
+        ierr = VecSetRandom(u, rctx);
+        CHKERRQ(ierr);
+        ierr = PetscRandomDestroy(&rctx);
+        CHKERRQ(ierr);
+    } else {
+        ierr = VecSet(u, 1.0);
+        CHKERRQ(ierr);
+    }
+    ierr = MatMult(A, u, b);
+    CHKERRQ(ierr);
+
+    /*
+       View the exact solution vector if desired
+     */
+    flg = PETSC_FALSE;
+    ierr = PetscOptionsGetBool(NULL, "-view_exact_sol", &flg, NULL);
+    CHKERRQ(ierr);
+    if (flg) {
+        ierr = VecView(u, PETSC_VIEWER_STDOUT_WORLD);
+        CHKERRQ(ierr);
+    }
+
+    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+                  Create the linear solver and set various options
+       - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+    /*
+       Create linear solver context
+     */
+    ierr = KSPCreate(PETSC_COMM_WORLD, &ksp);
+    CHKERRQ(ierr);
+
+    /*
+       Set operators. Here the matrix that defines the linear system
+       also serves as the preconditioning matrix.
+     */
+    ierr = KSPSetOperators(ksp, A, A);
+    CHKERRQ(ierr);
+
+    /*
+       Set linear solver defaults for this problem (optional).
+       - By extracting the KSP and PC contexts from the KSP context,
+         we can then directly call any KSP and PC routines to set
+         various options.
+       - The following two statements are optional; all of these
+         parameters could alternatively be specified at runtime via
+         KSPSetFromOptions().  All of these defaults can be
+         overridden at runtime, as indicated below.
+     */
+    ierr = KSPSetTolerances(ksp, 1.e-2 / ((m + 1)*(n + 1)), 1.e-50, PETSC_DEFAULT,
+            PETSC_DEFAULT);
+    CHKERRQ(ierr);
+
+    /*
+      Set runtime options, e.g.,
+          -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
+      These options will override those specified above as long as
+      KSPSetFromOptions() is called _after_ any other customization
+      routines.
+     */
+    ierr = KSPSetFromOptions(ksp);
+    CHKERRQ(ierr);
+
+    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+                        Solve the linear system
+       - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+    ierr = KSPSolve(ksp, b, x);
+    CHKERRQ(ierr);
+
+    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+                        Check solution and clean up
+       - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+    /*
+       Check the error
+     */
+    ierr = VecAXPY(x, -1.0, u);
+    CHKERRQ(ierr);
+    ierr = VecNorm(x, NORM_2, &norm);
+    CHKERRQ(ierr);
+    ierr = KSPGetIterationNumber(ksp, &its);
+    CHKERRQ(ierr);
+
+    /*
+       Print convergence information.  PetscPrintf() produces a single
+       print statement from all processes that share a communicator.
+       An alternative is PetscFPrintf(), which prints to a file.
+     */
+    ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %D\n", (double) norm, its);
+    CHKERRQ(ierr);
+
+    /*
+       Free work space.  All PETSc objects should be destroyed when they
+       are no longer needed.
+     */
+    ierr = KSPDestroy(&ksp);
+    CHKERRQ(ierr);
+    ierr = VecDestroy(&u);
+    CHKERRQ(ierr);
+    ierr = VecDestroy(&x);
+    CHKERRQ(ierr);
+    ierr = VecDestroy(&b);
+    CHKERRQ(ierr);
+    ierr = MatDestroy(&A);
+    CHKERRQ(ierr);
+
+    /*
+       Always call PetscFinalize() before exiting a program.  This routine
+         - finalizes the PETSc libraries as well as MPI
+         - provides summary and diagnostic information if certain runtime
+           options are chosen (e.g., -log_summary).
+     */
+    ierr = PetscFinalize();
+    return 0;
 }
diff --git a/krylov/ksp_solver_two_sys.c b/krylov/ksp_solver_two_sys.c
index de5edd688d7b45c68fd03b52c2d954544aab7b11..1feb7bec31fbcbfe2b9d1011bd868c0ef5ecc913 100644
--- a/krylov/ksp_solver_two_sys.c
+++ b/krylov/ksp_solver_two_sys.c
@@ -7,9 +7,9 @@ All rights reserved.
 Redistribution and use in source and binary forms, with or without modification,
 are permitted provided that the following conditions are met:
 
-* Redistributions of source code must retain the above copyright notice, this
+ * Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright notice, this
+ * Redistributions in binary form must reproduce the above copyright notice, this
   list of conditions and the following disclaimer in the documentation and/or
   other materials provided with the distribution.
 
@@ -23,7 +23,7 @@ are permitted provided that the following conditions are met:
   ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+ */
 
 static char help[] = "Solves two linear systems in parallel with KSP.  The code\n\
 illustrates repeated solution of linear systems with the same preconditioner\n\
@@ -45,291 +45,399 @@ T*/
      petscmat.h - matrices
      petscis.h     - index sets            petscksp.h - Krylov subspace methods
      petscviewer.h - viewers               petscpc.h  - preconditioners
-*/
+ */
 #include <petscksp.h>
 
 #undef __FUNCT__
 #define __FUNCT__ "main"
-int main(int argc,char **args)
-{
-  KSP            ksp;              /* linear solver context */
-  Mat            C;                /* matrix */
-  Vec            x,u,b;            /* approx solution, RHS, exact solution */
-  PetscReal      norm;             /* norm of solution error */
-  PetscScalar    v,none = -1.0;
-  PetscInt       Ii,J,ldim,low,high,iglobal,Istart,Iend;
-  PetscErrorCode ierr;
-  PetscInt       i,j,m = 3,n = 2,its;
-  PetscMPIInt    size,rank;
-  PetscBool      mat_nonsymmetric = PETSC_FALSE;
-  PetscBool      testnewC         = PETSC_FALSE;
+
+int main(int argc, char **args) {
+    KSP ksp; /* linear solver context */
+    Mat C; /* matrix */
+    Vec x, u, b; /* approx solution, RHS, exact solution */
+    PetscReal norm; /* norm of solution error */
+    PetscScalar v, none = -1.0;
+    PetscInt Ii, J, ldim, low, high, iglobal, Istart, Iend;
+    PetscErrorCode ierr;
+    PetscInt i, j, m = 3, n = 2, its;
+    PetscMPIInt size, rank;
+    PetscBool mat_nonsymmetric = PETSC_FALSE;
+    PetscBool testnewC = PETSC_FALSE;
 #if defined(PETSC_USE_LOG)
-  PetscLogStage stages[2];
+    PetscLogStage stages[2];
 #endif
 
-  PetscInitialize(&argc,&args,(char*)0,help);
-  ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr);
-  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
-  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
-  n    = 2*size;
-
-  /*
-     Set flag if we are doing a nonsymmetric problem; the default is symmetric.
-  */
-  ierr = PetscOptionsGetBool(NULL,"-mat_nonsym",&mat_nonsymmetric,NULL);CHKERRQ(ierr);
-
-  /*
-     Register two stages for separate profiling of the two linear solves.
-     Use the runtime option -log_summary for a printout of performance
-     statistics at the program's conlusion.
-  */
-  ierr = PetscLogStageRegister("Original Solve",&stages[0]);CHKERRQ(ierr);
-  ierr = PetscLogStageRegister("Second Solve",&stages[1]);CHKERRQ(ierr);
-
-  /* -------------- Stage 0: Solve Original System ---------------------- */
-  /*
-     Indicate to PETSc profiling that we're beginning the first stage
-  */
-  ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr);
-
-  /*
-     Create parallel matrix, specifying only its global dimensions.
-     When using MatCreate(), the matrix format can be specified at
-     runtime. Also, the parallel partitioning of the matrix is
-     determined by PETSc at runtime.
-  */
-  ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr);
-  ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr);
-  ierr = MatSetFromOptions(C);CHKERRQ(ierr);
-  ierr = MatSetUp(C);CHKERRQ(ierr);
-
-  /*
-     Currently, all PETSc parallel matrix formats are partitioned by
-     contiguous chunks of rows across the processors.  Determine which
-     rows of the matrix are locally owned.
-  */
-  ierr = MatGetOwnershipRange(C,&Istart,&Iend);CHKERRQ(ierr);
-
-  /*
-     Set matrix entries matrix in parallel.
-      - Each processor needs to insert only elements that it owns
-        locally (but any non-local elements will be sent to the
-        appropriate processor during matrix assembly).
-      - Always specify global row and columns of matrix entries.
-  */
-  for (Ii=Istart; Ii<Iend; Ii++) {
-    v = -1.0; i = Ii/n; j = Ii - i*n;
-    if (i>0)   {J = Ii - n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-    if (i<m-1) {J = Ii + n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-    if (j>0)   {J = Ii - 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-    if (j<n-1) {J = Ii + 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-    v = 4.0; ierr = MatSetValues(C,1,&Ii,1,&Ii,&v,ADD_VALUES);
-  }
-
-  /*
-     Make the matrix nonsymmetric if desired
-  */
-  if (mat_nonsymmetric) {
-    for (Ii=Istart; Ii<Iend; Ii++) {
-      v = -1.5; i = Ii/n;
-      if (i>1)   {J = Ii-n-1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
+    PetscInitialize(&argc, &args, (char*) 0, help);
+    ierr = PetscOptionsGetInt(NULL, "-m", &m, NULL);
+    CHKERRQ(ierr);
+    ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
+    CHKERRQ(ierr);
+    ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);
+    CHKERRQ(ierr);
+    n = 2 * size;
+
+    /*
+       Set flag if we are doing a nonsymmetric problem; the default is symmetric.
+     */
+    ierr = PetscOptionsGetBool(NULL, "-mat_nonsym", &mat_nonsymmetric, NULL);
+    CHKERRQ(ierr);
+
+    /*
+       Register two stages for separate profiling of the two linear solves.
+       Use the runtime option -log_summary for a printout of performance
+       statistics at the program's conlusion.
+     */
+    ierr = PetscLogStageRegister("Original Solve", &stages[0]);
+    CHKERRQ(ierr);
+    ierr = PetscLogStageRegister("Second Solve", &stages[1]);
+    CHKERRQ(ierr);
+
+    /* -------------- Stage 0: Solve Original System ---------------------- */
+    /*
+       Indicate to PETSc profiling that we're beginning the first stage
+     */
+    ierr = PetscLogStagePush(stages[0]);
+    CHKERRQ(ierr);
+
+    /*
+       Create parallel matrix, specifying only its global dimensions.
+       When using MatCreate(), the matrix format can be specified at
+       runtime. Also, the parallel partitioning of the matrix is
+       determined by PETSc at runtime.
+     */
+    ierr = MatCreate(PETSC_COMM_WORLD, &C);
+    CHKERRQ(ierr);
+    ierr = MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m*n, m * n);
+    CHKERRQ(ierr);
+    ierr = MatSetFromOptions(C);
+    CHKERRQ(ierr);
+    ierr = MatSetUp(C);
+    CHKERRQ(ierr);
+
+    /*
+       Currently, all PETSc parallel matrix formats are partitioned by
+       contiguous chunks of rows across the processors.  Determine which
+       rows of the matrix are locally owned.
+     */
+    ierr = MatGetOwnershipRange(C, &Istart, &Iend);
+    CHKERRQ(ierr);
+
+    /*
+       Set matrix entries matrix in parallel.
+        - Each processor needs to insert only elements that it owns
+          locally (but any non-local elements will be sent to the
+          appropriate processor during matrix assembly).
+        - Always specify global row and columns of matrix entries.
+     */
+    for (Ii = Istart; Ii < Iend; Ii++) {
+        v = -1.0;
+        i = Ii / n;
+        j = Ii - i*n;
+        if (i > 0) {
+            J = Ii - n;
+            ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
+            CHKERRQ(ierr);
+        }
+        if (i < m - 1) {
+            J = Ii + n;
+            ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
+            CHKERRQ(ierr);
+        }
+        if (j > 0) {
+            J = Ii - 1;
+            ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
+            CHKERRQ(ierr);
+        }
+        if (j < n - 1) {
+            J = Ii + 1;
+            ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
+            CHKERRQ(ierr);
+        }
+        v = 4.0;
+        ierr = MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
     }
-  } else {
-    ierr = MatSetOption(C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
-    ierr = MatSetOption(C,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr);
-  }
-
-  /*
-     Assemble matrix, using the 2-step process:
-       MatAssemblyBegin(), MatAssemblyEnd()
-     Computations can be done while messages are in transition
-     by placing code between these two statements.
-  */
-  ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-  ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-
-  /*
-     Create parallel vectors.
-      - When using VecSetSizes(), we specify only the vector's global
-        dimension; the parallel partitioning is determined at runtime.
-      - Note: We form 1 vector from scratch and then duplicate as needed.
-  */
-  ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr);
-  ierr = VecSetSizes(u,PETSC_DECIDE,m*n);CHKERRQ(ierr);
-  ierr = VecSetFromOptions(u);CHKERRQ(ierr);
-  ierr = VecDuplicate(u,&b);CHKERRQ(ierr);
-  ierr = VecDuplicate(b,&x);CHKERRQ(ierr);
-
-  /*
-     Currently, all parallel PETSc vectors are partitioned by
-     contiguous chunks across the processors.  Determine which
-     range of entries are locally owned.
-  */
-  ierr = VecGetOwnershipRange(x,&low,&high);CHKERRQ(ierr);
-
-  /*
-    Set elements within the exact solution vector in parallel.
-     - Each processor needs to insert only elements that it owns
-       locally (but any non-local entries will be sent to the
-       appropriate processor during vector assembly).
-     - Always specify global locations of vector entries.
-  */
-  ierr = VecGetLocalSize(x,&ldim);CHKERRQ(ierr);
-  for (i=0; i<ldim; i++) {
-    iglobal = i + low;
-    v       = (PetscScalar)(i + 100*rank);
-    ierr    = VecSetValues(u,1,&iglobal,&v,INSERT_VALUES);CHKERRQ(ierr);
-  }
-
-  /*
-     Assemble vector, using the 2-step process:
-       VecAssemblyBegin(), VecAssemblyEnd()
-     Computations can be done while messages are in transition,
-     by placing code between these two statements.
-  */
-  ierr = VecAssemblyBegin(u);CHKERRQ(ierr);
-  ierr = VecAssemblyEnd(u);CHKERRQ(ierr);
-
-  /*
-     Compute right-hand-side vector
-  */
-  ierr = MatMult(C,u,b);CHKERRQ(ierr);
-
-  /*
-    Create linear solver context
-  */
-  ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
-
-  /*
-     Set operators. Here the matrix that defines the linear system
-     also serves as the preconditioning matrix.
-  */
-  ierr = KSPSetOperators(ksp,C,C);CHKERRQ(ierr);
-
-  /*
-     Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
-  */
-  ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
-
-  /*
-     Solve linear system.  Here we explicitly call KSPSetUp() for more
-     detailed performance monitoring of certain preconditioners, such
-     as ICC and ILU.  This call is optional, as KSPSetUp() will
-     automatically be called within KSPSolve() if it hasn't been
-     called already.
-  */
-  ierr = KSPSetUp(ksp);CHKERRQ(ierr);
-  ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
-
-  /*
-     Check the error
-  */
-  ierr = VecAXPY(x,none,u);CHKERRQ(ierr);
-  ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
-  ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr);
-  if (norm > 1.e-13) {
-    ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);CHKERRQ(ierr);
-  }
-
-  /* -------------- Stage 1: Solve Second System ---------------------- */
-  /*
-     Solve another linear system with the same method.  We reuse the KSP
-     context, matrix and vector data structures, and hence save the
-     overhead of creating new ones.
-
-     Indicate to PETSc profiling that we're concluding the first
-     stage with PetscLogStagePop(), and beginning the second stage with
-     PetscLogStagePush().
-  */
-  ierr = PetscLogStagePop();CHKERRQ(ierr);
-  ierr = PetscLogStagePush(stages[1]);CHKERRQ(ierr);
-
-  /*
-     Initialize all matrix entries to zero.  MatZeroEntries() retains the
-     nonzero structure of the matrix for sparse formats.
-  */
-  ierr = MatZeroEntries(C);CHKERRQ(ierr);
-
-  /*
-     Assemble matrix again.  Note that we retain the same matrix data
-     structure and the same nonzero pattern; we just change the values
-     of the matrix entries.
-  */
-  for (i=0; i<m; i++) {
-    for (j=2*rank; j<2*rank+2; j++) {
-      v = -1.0;  Ii = j + n*i;
-      if (i>0)   {J = Ii - n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-      if (i<m-1) {J = Ii + n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-      if (j>0)   {J = Ii - 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-      if (j<n-1) {J = Ii + 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
-      v = 6.0; ierr = MatSetValues(C,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr);
+
+    /*
+       Make the matrix nonsymmetric if desired
+     */
+    if (mat_nonsymmetric) {
+        for (Ii = Istart; Ii < Iend; Ii++) {
+            v = -1.5;
+            i = Ii / n;
+            if (i > 1) {
+                J = Ii - n - 1;
+                ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
+                CHKERRQ(ierr);
+            }
+        }
+    } else {
+        ierr = MatSetOption(C, MAT_SYMMETRIC, PETSC_TRUE);
+        CHKERRQ(ierr);
+        ierr = MatSetOption(C, MAT_SYMMETRY_ETERNAL, PETSC_TRUE);
+        CHKERRQ(ierr);
     }
-  }
-  if (mat_nonsymmetric) {
-    for (Ii=Istart; Ii<Iend; Ii++) {
-      v = -1.5; i = Ii/n;
-      if (i>1)   {J = Ii-n-1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);}
+
+    /*
+       Assemble matrix, using the 2-step process:
+         MatAssemblyBegin(), MatAssemblyEnd()
+       Computations can be done while messages are in transition
+       by placing code between these two statements.
+     */
+    ierr = MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
+    CHKERRQ(ierr);
+    ierr = MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
+    CHKERRQ(ierr);
+
+    /*
+       Create parallel vectors.
+        - When using VecSetSizes(), we specify only the vector's global
+          dimension; the parallel partitioning is determined at runtime.
+        - Note: We form 1 vector from scratch and then duplicate as needed.
+     */
+    ierr = VecCreate(PETSC_COMM_WORLD, &u);
+    CHKERRQ(ierr);
+    ierr = VecSetSizes(u, PETSC_DECIDE, m * n);
+    CHKERRQ(ierr);
+    ierr = VecSetFromOptions(u);
+    CHKERRQ(ierr);
+    ierr = VecDuplicate(u, &b);
+    CHKERRQ(ierr);
+    ierr = VecDuplicate(b, &x);
+    CHKERRQ(ierr);
+
+    /*
+       Currently, all parallel PETSc vectors are partitioned by
+       contiguous chunks across the processors.  Determine which
+       range of entries are locally owned.
+     */
+    ierr = VecGetOwnershipRange(x, &low, &high);
+    CHKERRQ(ierr);
+
+    /*
+      Set elements within the exact solution vector in parallel.
+       - Each processor needs to insert only elements that it owns
+         locally (but any non-local entries will be sent to the
+         appropriate processor during vector assembly).
+       - Always specify global locations of vector entries.
+     */
+    ierr = VecGetLocalSize(x, &ldim);
+    CHKERRQ(ierr);
+    for (i = 0; i < ldim; i++) {
+        iglobal = i + low;
+        v = (PetscScalar) (i + 100 * rank);
+        ierr = VecSetValues(u, 1, &iglobal, &v, INSERT_VALUES);
+        CHKERRQ(ierr);
     }
-  }
-  ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-  ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
 
-  ierr = PetscOptionsGetBool(NULL,"-test_newMat",&testnewC,NULL);CHKERRQ(ierr);
-  if (testnewC) {
     /*
-     User may use a new matrix C with same nonzero pattern, e.g.
-      ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_package mumps -test_newMat
-    */
-    Mat Ctmp;
-    ierr = MatDuplicate(C,MAT_COPY_VALUES,&Ctmp);CHKERRQ(ierr);
-    ierr = MatDestroy(&C);CHKERRQ(ierr);
-    ierr = MatDuplicate(Ctmp,MAT_COPY_VALUES,&C);CHKERRQ(ierr);
-    ierr = MatDestroy(&Ctmp);CHKERRQ(ierr);
-  }
-  /*
-     Compute another right-hand-side vector
-  */
-  ierr = MatMult(C,u,b);CHKERRQ(ierr);
-
-  /*
-     Set operators. Here the matrix that defines the linear system
-     also serves as the preconditioning matrix.
-  */
-  ierr = KSPSetOperators(ksp,C,C);CHKERRQ(ierr);
-
-  /*
-     Solve linear system
-  */
-  ierr = KSPSetUp(ksp);CHKERRQ(ierr);
-  ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
-
-  /*
-     Check the error
-  */
-  ierr = VecAXPY(x,none,u);CHKERRQ(ierr);
-  ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
-  ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr);
-  if (norm > 1.e-4) {
-    ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);CHKERRQ(ierr);
-  }
-
-  /*
-     Free work space.  All PETSc objects should be destroyed when they
-     are no longer needed.
-  */
-  ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
-  ierr = VecDestroy(&u);CHKERRQ(ierr);
-  ierr = VecDestroy(&x);CHKERRQ(ierr);
-  ierr = VecDestroy(&b);CHKERRQ(ierr);
-  ierr = MatDestroy(&C);CHKERRQ(ierr);
-
-  /*
-     Indicate to PETSc profiling that we're concluding the second stage
-  */
-  ierr = PetscLogStagePop();CHKERRQ(ierr);
-
-  ierr = PetscFinalize();
-  return 0;
+       Assemble vector, using the 2-step process:
+         VecAssemblyBegin(), VecAssemblyEnd()
+       Computations can be done while messages are in transition,
+       by placing code between these two statements.
+     */
+    ierr = VecAssemblyBegin(u);
+    CHKERRQ(ierr);
+    ierr = VecAssemblyEnd(u);
+    CHKERRQ(ierr);
+
+    /*
+       Compute right-hand-side vector
+     */
+    ierr = MatMult(C, u, b);
+    CHKERRQ(ierr);
+
+    /*
+      Create linear solver context
+     */
+    ierr = KSPCreate(PETSC_COMM_WORLD, &ksp);
+    CHKERRQ(ierr);
+
+    /*
+       Set operators. Here the matrix that defines the linear system
+       also serves as the preconditioning matrix.
+     */
+    ierr = KSPSetOperators(ksp, C, C);
+    CHKERRQ(ierr);
+
+    /*
+       Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
+     */
+    ierr = KSPSetFromOptions(ksp);
+    CHKERRQ(ierr);
+
+    /*
+       Solve linear system.  Here we explicitly call KSPSetUp() for more
+       detailed performance monitoring of certain preconditioners, such
+       as ICC and ILU.  This call is optional, as KSPSetUp() will
+       automatically be called within KSPSolve() if it hasn't been
+       called already.
+     */
+    ierr = KSPSetUp(ksp);
+    CHKERRQ(ierr);
+    ierr = KSPSolve(ksp, b, x);
+    CHKERRQ(ierr);
+
+    /*
+       Check the error
+     */
+    ierr = VecAXPY(x, none, u);
+    CHKERRQ(ierr);
+    ierr = VecNorm(x, NORM_2, &norm);
+    CHKERRQ(ierr);
+    ierr = KSPGetIterationNumber(ksp, &its);
+    CHKERRQ(ierr);
+    if (norm > 1.e-13) {
+        ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, Iterations %D\n", (double) norm, its);
+        CHKERRQ(ierr);
+    }
+
+    /* -------------- Stage 1: Solve Second System ---------------------- */
+    /*
+       Solve another linear system with the same method.  We reuse the KSP
+       context, matrix and vector data structures, and hence save the
+       overhead of creating new ones.
+
+       Indicate to PETSc profiling that we're concluding the first
+       stage with PetscLogStagePop(), and beginning the second stage with
+       PetscLogStagePush().
+     */
+    ierr = PetscLogStagePop();
+    CHKERRQ(ierr);
+    ierr = PetscLogStagePush(stages[1]);
+    CHKERRQ(ierr);
+
+    /*
+       Initialize all matrix entries to zero.  MatZeroEntries() retains the
+       nonzero structure of the matrix for sparse formats.
+     */
+    ierr = MatZeroEntries(C);
+    CHKERRQ(ierr);
+
+    /*
+       Assemble matrix again.  Note that we retain the same matrix data
+       structure and the same nonzero pattern; we just change the values
+       of the matrix entries.
+     */
+    for (i = 0; i < m; i++) {
+        for (j = 2 * rank; j < 2 * rank + 2; j++) {
+            v = -1.0;
+            Ii = j + n*i;
+            if (i > 0) {
+                J = Ii - n;
+                ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
+                CHKERRQ(ierr);
+            }
+            if (i < m - 1) {
+                J = Ii + n;
+                ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
+                CHKERRQ(ierr);
+            }
+            if (j > 0) {
+                J = Ii - 1;
+                ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
+                CHKERRQ(ierr);
+            }
+            if (j < n - 1) {
+                J = Ii + 1;
+                ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
+                CHKERRQ(ierr);
+            }
+            v = 6.0;
+            ierr = MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
+            CHKERRQ(ierr);
+        }
+    }
+    if (mat_nonsymmetric) {
+        for (Ii = Istart; Ii < Iend; Ii++) {
+            v = -1.5;
+            i = Ii / n;
+            if (i > 1) {
+                J = Ii - n - 1;
+                ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
+                CHKERRQ(ierr);
+            }
+        }
+    }
+    ierr = MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
+    CHKERRQ(ierr);
+    ierr = MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
+    CHKERRQ(ierr);
+
+    ierr = PetscOptionsGetBool(NULL, "-test_newMat", &testnewC, NULL);
+    CHKERRQ(ierr);
+    if (testnewC) {
+        /*
+         User may use a new matrix C with same nonzero pattern, e.g.
+          ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_package mumps -test_newMat
+         */
+        Mat Ctmp;
+        ierr = MatDuplicate(C, MAT_COPY_VALUES, &Ctmp);
+        CHKERRQ(ierr);
+        ierr = MatDestroy(&C);
+        CHKERRQ(ierr);
+        ierr = MatDuplicate(Ctmp, MAT_COPY_VALUES, &C);
+        CHKERRQ(ierr);
+        ierr = MatDestroy(&Ctmp);
+        CHKERRQ(ierr);
+    }
+    /*
+       Compute another right-hand-side vector
+     */
+    ierr = MatMult(C, u, b);
+    CHKERRQ(ierr);
+
+    /*
+       Set operators. Here the matrix that defines the linear system
+       also serves as the preconditioning matrix.
+     */
+    ierr = KSPSetOperators(ksp, C, C);
+    CHKERRQ(ierr);
+
+    /*
+       Solve linear system
+     */
+    ierr = KSPSetUp(ksp);
+    CHKERRQ(ierr);
+    ierr = KSPSolve(ksp, b, x);
+    CHKERRQ(ierr);
+
+    /*
+       Check the error
+     */
+    ierr = VecAXPY(x, none, u);
+    CHKERRQ(ierr);
+    ierr = VecNorm(x, NORM_2, &norm);
+    CHKERRQ(ierr);
+    ierr = KSPGetIterationNumber(ksp, &its);
+    CHKERRQ(ierr);
+    if (norm > 1.e-4) {
+        ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, Iterations %D\n", (double) norm, its);
+        CHKERRQ(ierr);
+    }
+
+    /*
+       Free work space.  All PETSc objects should be destroyed when they
+       are no longer needed.
+     */
+    ierr = KSPDestroy(&ksp);
+    CHKERRQ(ierr);
+    ierr = VecDestroy(&u);
+    CHKERRQ(ierr);
+    ierr = VecDestroy(&x);
+    CHKERRQ(ierr);
+    ierr = VecDestroy(&b);
+    CHKERRQ(ierr);
+    ierr = MatDestroy(&C);
+    CHKERRQ(ierr);
+
+    /*
+       Indicate to PETSc profiling that we're concluding the second stage
+     */
+    ierr = PetscLogStagePop();
+    CHKERRQ(ierr);
+
+    ierr = PetscFinalize();
+    return 0;
 }
 
 
diff --git a/spgemm/mkl_shmem/mklspgemm.c b/spgemm/mkl_shmem/mklspgemm.c
index 990d4d322de9d6e478042ef10d9afafa3184e0a0..08e34e04b72c5c34af05876c05907217b1cb1208 100644
--- a/spgemm/mkl_shmem/mklspgemm.c
+++ b/spgemm/mkl_shmem/mklspgemm.c
@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <math.h>
 #include <mkl.h>
-#define MIC_TARGET 
+#define MIC_TARGET
 #include "../../common/mmio.h"
 #include "../../common/util.h"
 #include "../../common/csr.h"
@@ -15,7 +15,7 @@ char transa = 'n';
 /*}}}*/
 
 /** Multiply two sparse matrices which are stored in CSR format. MKL is used */
-void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) {  /*{{{*/	
+void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) {  /*{{{*/
     MKL_INT* CJ = NULL;
     double* Cval = NULL;
     MKL_INT sort = 3;    // sort everything
@@ -24,7 +24,7 @@ void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT*
     MKL_INT ierr;
     MKL_INT request = 1;    // symbolic multiplication
     mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr);
-    
+
     request = 2;        //numeric multiplication
     int Cnnz = CI[Am]-1;
     int Cval_size = Cnnz + 1;
@@ -38,7 +38,7 @@ void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT*
     double time_end = dsecnd();
     *time = (time_end - time_st)/nrepeat;
     *pCval = Cval; *pCJ = CJ; *pCI = CI;
-} /*}}}*/ 
+} /*}}}*/
 
 int main(int argc, char* argv[]) { /*{{{*/
     /** usage */
@@ -68,51 +68,51 @@ int main(int argc, char* argv[]) { /*{{{*/
     }
     option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES;    iarg++;
     assert(nrequired_args == iarg);
-    
+
     /** read matrix market file for A matrix */
     MKL_INT Am, An, Annz;
-    MKL_INT *Ax, *Ay;  
+    MKL_INT *Ax, *Ay;
     double *Anz;
     read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz);
-    
+
     /** construct csr storage for A matrix */
     MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 );
     MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 );
     double* Aval = (double*) mkl_malloc( Annz * sizeof( double ),  64 );
-    coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval);    
-    
+    coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval);
+
     /** read matrix market file for B matrix */
-    MKL_INT Bm, Bn, Bnnz;   
-    MKL_INT *Bx, *By;  
+    MKL_INT Bm, Bn, Bnnz;
+    MKL_INT *Bx, *By;
     double *Bnz;
     read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz);
-    
+
     /** construct csr storage for B matrix */
     MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 );
     MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 );
     double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ),  64 );
-    coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval);    
-    
+    coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval);
+
     /** multiply two matrices */
-    double* Cval, time; 
+    double* Cval, time;
     MKL_INT* CJ; MKL_INT* CI;
     mkl_cpu_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time);
-    
+
     int i;
     for(i=0;i<=Am;i++)AI[i]--;    for(i=0;i<Annz;i++)AJ[i]--;
     for(i=0;i<=Bm;i++)BI[i]--;    for(i=0;i<Bnnz;i++)BJ[i]--;
     printmm_one(Am, Cval, CJ, CI);
-    
+
     /** In order to write the output C matrix to file, uncomment the following line */
     printfilemm_one(strpathC, Am, Bn, Cval, CJ, CI);
-    
+
     /** run my SpGEMM routine in order to find number of multiply-and-add operations */
     long nummult = 0;
     csi* xb = (csi*)calloc(Bn, sizeof(csi));
     csv* x = (csv*)calloc(Bn, sizeof(csv));
     csr* C = csr_multiply(Am, An, Annz, AI, AJ, Aval, Bm, Bn, Bnnz, BI, BJ, Bval, &nummult, xb, x);
     double gflop = 2 * (double) nummult / 1e9;
-    
+
     /** print gflop per second and time */
     printf("%d\t", nthreads);
     printf("%g\t", (gflop/time));
@@ -121,7 +121,7 @@ int main(int argc, char* argv[]) { /*{{{*/
     printf("%s\t", strpathB);
     printf("%s\t", strpathC);
     printf("\n");
-    
+
     /** free allocated space */
     mkl_free(AI);
     mkl_free(AJ);
diff --git a/spgemm/mkl_xphi/mklspgemm.c b/spgemm/mkl_xphi/mklspgemm.c
index 14b15cacad999fbd87aec20743203605a2a4f293..12661eed01dcc90b6f7cccf430759196e903c3ed 100644
--- a/spgemm/mkl_xphi/mklspgemm.c
+++ b/spgemm/mkl_xphi/mklspgemm.c
@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <math.h>
 #include <mkl.h>
-#define MIC_TARGET __declspec(target(mic)) 
+#define MIC_TARGET __declspec(target(mic))
 #include "../../common/mmio.h"
 #include "../../common/util.h"
 #include "../../common/csr.h"
@@ -26,7 +26,7 @@ void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT*
         } else {
             nthreads = mkl_get_max_threads();
         }
-    } 
+    }
     #pragma offload target(mic:micdev) \
         in(transa) \
         in(Am) \
@@ -37,7 +37,7 @@ void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT*
         in(AJ:length(Annz)    free_if(0)) \
         in(Bval:length(Bnnz)    free_if(0)) \
         in(BI:length(An+1)    free_if(0)) \
-        in(BJ:length(Bnnz)    free_if(0)) 
+        in(BJ:length(Bnnz)    free_if(0))
     {}
 
     MKL_INT    Cnnz_host = -1;
@@ -68,11 +68,11 @@ void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT*
 
         for(i = 0; i < 10; i++) {
             mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr);
-        }    
+        }
         double s_initial = dsecnd();
         for(i = 0; i < nrepeat; i++) {
             mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr);
-        }    
+        }
         time_mic_symbolic_mm = (dsecnd() - s_initial) / nrepeat; // seconds
 
         request = 2;        //numeric multiplication
@@ -109,13 +109,13 @@ void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT*
     }
     *time = time_mic_numeric_mm;
     if (1) { // Copy result matrix from MIC to Host
-    int nelm_CI = Am + 2; 
-    int nelm_CJ = Cnnz_host + 1; 
-    int nelm_Cval = Cnnz_host + 1; 
+    int nelm_CI = Am + 2;
+    int nelm_CJ = Cnnz_host + 1;
+    int nelm_Cval = Cnnz_host + 1;
     __declspec(target(mic)) MKL_INT* CI_host = (MKL_INT*)mkl_malloc( (nelm_CI) * sizeof( MKL_INT ), 64 );
     __declspec(target(mic)) MKL_INT* CJ_host = (MKL_INT*)mkl_malloc( (nelm_CJ) * sizeof( MKL_INT ), 64 );
     __declspec(target(mic)) double* Cval_host = (double*)mkl_malloc( (nelm_Cval) * sizeof( double ), 64 );
-    #pragma offload target(mic:micdev) in(nelm_CI)\ 
+    #pragma offload target(mic:micdev) in(nelm_CI)\
         inout(CI_host:length(nelm_CI)    alloc_if(1)    free_if(0)) \
         inout(CJ_host:length(nelm_CJ)    alloc_if(1)    free_if(0)) \
         inout(Cval_host:length(nelm_Cval)    alloc_if(1)    free_if(0)) \
@@ -127,7 +127,7 @@ void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT*
         for(i = 0; i < nelm_CI; i++) CI_host[i] = CI[i];
         for(i = 0; i < nelm_CJ; i++) {CJ_host[i] = CJ[i]; Cval_host[i] = Cval[i];}
     }
-    *pCval = Cval_host; *pCJ = CJ_host; *pCI = CI_host; 
+    *pCval = Cval_host; *pCJ = CJ_host; *pCI = CI_host;
     }
 } /* ENDOF mkl_mic_spmm }}}*/
 
@@ -162,51 +162,51 @@ int main(int argc, char* argv[]) { /*{{{*/
     micdev = atoi(argv[iarg]);    iarg++;
     option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES;    iarg++;
     assert(nrequired_args == iarg);
-    
+
     /** read matrix market file for A matrix */
     MKL_INT Am, An, Annz;
-    MKL_INT *Ax, *Ay;  
+    MKL_INT *Ax, *Ay;
     double *Anz;
     read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz);
-    
+
     /** construct csr storage for A matrix */
     MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 );
     MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 );
     double* Aval = (double*) mkl_malloc( Annz * sizeof( double ),  64 );
-    coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval);    
-    
+    coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval);
+
     /** read matrix market file for B matrix */
-    MKL_INT Bm, Bn, Bnnz;   
-    MKL_INT *Bx, *By;  
+    MKL_INT Bm, Bn, Bnnz;
+    MKL_INT *Bx, *By;
     double *Bnz;
     read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz);
-    
+
     /** construct csr storage for B matrix */
     MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 );
     MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 );
     double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ),  64 );
-    coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval);    
-    
+    coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval);
+
     /** multiply two matrices */
-    double* Cval, time; 
+    double* Cval, time;
     MKL_INT* CJ; MKL_INT* CI;
     mkl_mic_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time);
-    
+
     int i;
     for(i=0;i<=Am;i++)AI[i]--;    for(i=0;i<Annz;i++)AJ[i]--;
     for(i=0;i<=Bm;i++)BI[i]--;    for(i=0;i<Bnnz;i++)BJ[i]--;
     printmm_one(Am, Cval, CJ, CI);
-    
+
     /** Write the output C matrix to file */
     printfilemm_one(strpathC, Am, Bn, Cval, CJ, CI);
-    
+
     /** run my SpGEMM routine in order to find number of multiply-and-add operations */
     long nummult = 0;
     csi* xb = (csi*)calloc(Bn, sizeof(csi));
     csv* x = (csv*)calloc(Bn, sizeof(csv));
     csr* C = csr_multiply(Am, An, Annz, AI, AJ, Aval, Bm, Bn, Bnnz, BI, BJ, Bval, &nummult, xb, x);
     double gflop = 2 * (double) nummult / 1e9;
-    
+
     /** print gflop per second and time */
     printf("%d\t", nthreads);
     printf("%g\t", (gflop/time));
@@ -215,7 +215,7 @@ int main(int argc, char* argv[]) { /*{{{*/
     printf("%s\t", strpathB);
     printf("%s\t", strpathC);
     printf("\n");
-    
+
     /** free allocated space */
     mkl_free(AI);
     mkl_free(AJ);
diff --git a/spmv/mkl_shmem/mklspmv.c b/spmv/mkl_shmem/mklspmv.c
index 12b5f33599c9283cd12d8acff3a9ec77c0df0001..bdec50ffa107d77faad5139b80e4759d352a623b 100644
--- a/spmv/mkl_shmem/mklspmv.c
+++ b/spmv/mkl_shmem/mklspmv.c
@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <math.h>
 #include <mkl.h>
-#define MIC_TARGET 
+#define MIC_TARGET
 #include "../../common/mmio.h"
 #include "../../common/util.h"
 /*}}}*/
@@ -20,7 +20,7 @@ void mkl_cpu_spmv(const MKL_INT Am, const MKL_INT An,  double* Aval, MKL_INT* AJ
     double alpha = 1.0;
     double beta = 0.0;
     char* matdescra = "G NF";
-    
+
     double time_st = dsecnd();
     int i;
     for(i = 0; i < nrepeat; i++) {
@@ -29,7 +29,7 @@ void mkl_cpu_spmv(const MKL_INT Am, const MKL_INT An,  double* Aval, MKL_INT* AJ
     }
     double time_end = dsecnd();
     *time = (time_end - time_st)/nrepeat;
-} /*}}}*/ 
+} /*}}}*/
 
 int main(int argc, char* argv[]) { /*{{{*/
     /** usage */
@@ -57,40 +57,40 @@ int main(int argc, char* argv[]) { /*{{{*/
     }
     option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES;    iarg++;
     assert(nrequired_args == iarg);
-    
+
     /** read matrix market file for A matrix */
     MKL_INT Am, An, Annz;
-    MKL_INT *Ax, *Ay;  
+    MKL_INT *Ax, *Ay;
     double *Anz;
     read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz);
-    
+
     /** construct csr storage for A matrix */
     MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 );
     MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 );
     double* Aval = (double*) mkl_malloc( Annz * sizeof( double ),  64 );
-    coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval);    
-    
-    double* xvec = (double*) mkl_malloc( An * sizeof( double ), 64 ); 
+    coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval);
+
+    double* xvec = (double*) mkl_malloc( An * sizeof( double ), 64 );
     double* yvec = (double*) mkl_malloc( Am * sizeof( double ), 64 );
     int i;
     for(i=0;i<An;i++)xvec[i]=1.0;
     for(i=0;i<Am;i++)yvec[i]=0.0;
-    
-    double time; 
+
+    double time;
     mkl_cpu_spmv(Am, An, Aval, AJ, AI, xvec, yvec, &time);
-    
+
     printmm_one(Am, Aval, AJ, AI);
-    
+
     /** number of multiply-and-add operations in terms of giga flops*/
     double gflop = 2 * (double) Annz / 1e9;
-    
+
     /** print gflop per second and time */
     printf("%d\t", nthreads);
     printf("%g\t", (gflop/time));
     printf("%g\t", time);
     printf("%s\t", strpathA);
     printf("\n");
-    
+
     printf("Output vector:\n");
     for(i=0;i<Am;i++)printf("%g ", yvec[i]);
     printf("\n");