Algorithms_in_C 1.0.0
Set of algorithms implemented in C.
Loading...
Searching...
No Matches
lcs.c File Reference

Longest Common Subsequence algorithm More...

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
Include dependency graph for lcs.c:

Enumerations

enum  { LEFT , UP , DIAG }
 

Functions

void lcslen (const char *s1, const char *s2, int l1, int l2, int **L, int **B)
 Computes LCS between s1 and s2 using a dynamic-programming approach.
 
char * lcsbuild (const char *s1, int l1, int l2, int **L, int **B)
 Builds the LCS according to B using a traceback approach.
 
static void test ()
 Self-test implementations.
 
int main (int argc, char *argv[])
 Main function.
 

Detailed Description

Longest Common Subsequence algorithm

From Wikipedia: The longest common subsequence (LCS) problem is the problem of finding the longest subsequence common to all sequences in a set of sequences (often just two sequences).

Author
Kurtz

Enumeration Type Documentation

◆ anonymous enum

anonymous enum
18{LEFT, UP, DIAG};

Function Documentation

◆ lcsbuild()

char * lcsbuild ( const char *  s1,
int  l1,
int  l2,
int **  L,
int **  B 
)

Builds the LCS according to B using a traceback approach.

Parameters
s1first null-terminated string
l1length of s1
l2length of s2
Lmatrix of size l1 x l2
Bmatrix of size l1 x l2
Returns
lcs longest common subsequence
64 {
65 int i, j, lcsl;
66 char *lcs;
67 lcsl = L[l1][l2];
68
69 /* my lcs is at least the empty symbol */
70 lcs = (char *)calloc(lcsl+1, sizeof(char)); /* null-terminated \0 */
71 if (!lcs) {
72 perror("calloc: ");
73 return NULL;
74 }
75
76 i = l1, j = l2;
77 while (i > 0 && j > 0) {
78 /* walk the matrix backwards */
79 if (B[i][j] == DIAG) {
80 lcs[--lcsl] = s1[i-1];
81 i = i - 1;
82 j = j - 1;
83 }
84 else if (B[i][j] == LEFT)
85 {
86 j = j - 1;
87 }
88 else
89 {
90 i = i - 1;
91 }
92 }
93 return lcs;
94}
#define calloc(elemCount, elemSize)
This macro replace the standard calloc function with calloc_dbg.
Definition malloc_dbg.h:22
Definition list.h:8

◆ lcslen()

void lcslen ( const char *  s1,
const char *  s2,
int  l1,
int  l2,
int **  L,
int **  B 
)

Computes LCS between s1 and s2 using a dynamic-programming approach.

Parameters
s1first null-terminated string
s2second null-terminated string
l1length of s1
l2length of s2
Lmatrix of size l1 x l2
Bmatrix of size l1 x l2
Returns
void
30 {
31 /* B is the directions matrix
32 L is the LCS matrix */
33 int i, j;
34
35 /* loop over the simbols in my sequences
36 save the directions according to the LCS */
37 for (i = 1; i <= l1; ++i) {
38 for (j = 1; j <= l2; ++j) {
39 if (s1[i-1] == s2[j-1]) {
40 L[i][j] = 1 + L[i-1][j-1];
41 B[i][j] = DIAG;
42 }
43 else if (L[i-1][j] < L[i][j-1]) {
44 L[i][j] = L[i][j-1];
45 B[i][j] = LEFT;
46 }
47 else {
48 L[i][j] = L[i-1][j];
49 B[i][j] = UP;
50 }
51 }
52 }
53}

◆ main()

int main ( int  argc,
char *  argv[] 
)

Main function.

Parameters
argccommandline argument count (ignored)
argvcommandline array of arguments (ignored)
Returns
0 on exit
162 {
163 test(); // run self-test implementations
164 return 0;
165}
static void test()
Self-test implementations.
Definition lcs.c:100
Here is the call graph for this function:

◆ test()

static void test ( void  )
static

Self-test implementations.

Returns
void
100 {
101 /* https://en.wikipedia.org/wiki/Subsequence#Applications */
102 int **L, **B, j, l1, l2;
103
104 char *s1 = "ACGGTGTCGTGCTATGCTGATGCTGACTTATATGCTA";
105 char *s2 = "CGTTCGGCTATCGTACGTTCTATTCTATGATTTCTAA";
106 char *lcs;
107
108 l1 = strlen(s1);
109 l2 = strlen(s2);
110
111 L = (int **)calloc(l1+1, sizeof(int *));
112 B = (int **)calloc(l1+1, sizeof(int *));
113
114 if (!L) {
115 perror("calloc: ");
116 exit(1);
117 }
118 if (!B) {
119 perror("calloc: ");
120 exit(1);
121 }
122 for (j = 0; j <= l1; j++) {
123 L[j] = (int *)calloc(l2+1, sizeof(int));
124 if (!L[j]) {
125 perror("calloc: ");
126 exit(1);
127 }
128 B[j] = (int *)calloc(l2+1, sizeof(int));
129 if (!L[j]) {
130 perror("calloc: ");
131 exit(1);
132 }
133 }
134
135 lcslen(s1, s2, l1, l2, L, B);
136 lcs = lcsbuild(s1, l1, l2, L, B);
137
138 assert(L[l1][l2] == 27);
139 assert(strcmp(lcs, "CGTTCGGCTATGCTTCTACTTATTCTA") == 0);
140
141 printf("S1: %s\tS2: %s\n", s1, s2);
142 printf("LCS len:%3d\n", L[l1][l2]);
143 printf("LCS: %s\n", lcs);
144
145 free(lcs);
146 for (j = 0; j <= l1; j++)
147 {
148 free(L[j]), free(B[j]);
149 }
150 free(L);
151 free(B);
152
153 printf("All tests have successfully passed!\n");
154}
char * lcsbuild(const char *s1, int l1, int l2, int **L, int **B)
Builds the LCS according to B using a traceback approach.
Definition lcs.c:64
void lcslen(const char *s1, const char *s2, int l1, int l2, int **L, int **B)
Computes LCS between s1 and s2 using a dynamic-programming approach.
Definition lcs.c:30
#define free(ptr)
This macro replace the standard free function with free_dbg.
Definition malloc_dbg.h:26
Here is the call graph for this function: