Sunday, July 28, 2013

Given a paragraph of text, write a program to find the first shortest sub-segment that contains each of the given k words at least once. A segment is said to be shorter than other if it contains less number of words. Ignore characters other than [a-z][A-Z] in the text. Comparison between the strings should be case-insensitive. If no sub-segment is found then the program should output “NO SUBSEGMENT FOUND”.

/* CODE*/
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#define DEBUG 0
#define START 0
#define END 1
#define LENGTH 2
#define OFFSET 3
#define MAX_STRING_LEN 15
#define MAX_WORD_LENGTH 200000
void parsed_string(char *input_word, char *output_word);
int find_min_seq(char *parsed_word,int parse_pos,char ref_word[][15],int ref_len);

int cmp_table[200000]={0};
int result_table[2] = {0,MAX_WORD_LENGTH};
char input_str[200000],parsed_str[200000]={'\0'};;
main()
{
    int parse_pos = 1,ref_len =0,result,i,j,k;
    char *orig_word,*temp_word,(*ref_word)[15],parsed_word[15],*temp_str,current_str[15];
    gets(input_str);
    scanf("%d",&k);
    temp_word = (char *)malloc(sizeof(char)*MAX_STRING_LEN*k);
    ref_word = (char (*)[MAX_STRING_LEN])temp_word;

    for(i=0;i<k;i++)
    {
        scanf("%s",current_str);
       
        for(j=0;j<ref_len;j++)
        {
            if(strcmp(current_str,ref_word[j])== 0)
                break;
        }
        if(j == ref_len)
            {
            strcpy(ref_word[ref_len++],current_str);
            }
    }
    orig_word = strtok(input_str," ");

    while(orig_word !=NULL)
    {       

        parsed_string(orig_word, parsed_word);
        strcat(parsed_str,parsed_word);       
        strcat(parsed_str," ");
        result = find_min_seq(parsed_word,parse_pos,ref_word,ref_len);
        if(result == 1)
            break;
        orig_word = strtok(NULL," ");
        parse_pos++;
    }

    if(cmp_table[LENGTH] != ref_len)
    {
        printf("NO SUBSEGMENT FOUND\n");
    }
    else
    {
        i = 1;
        temp_str = parsed_str;
        while(i <= parse_pos)
        {
            if(i >= result_table[START])
            {
                if(i < result_table[END])
                {
                   while(*temp_str != ' ')
                       printf("%c",*temp_str++);
                   printf(" ");
                }
                else
                {
                    while(*temp_str != ' ')
                        printf("%c",*temp_str++);
                    break;
                }
            }
            while(*temp_str++ != ' ');
            i++;
        }
     }
#if DEBUG
        printf("start : %d, End: %d\n", result_table[START],result_table[END]);
    getchar();getchar();
#endif
    return 0;
}

void parsed_string(char *input_word, char *output_word)
{
    int i=0,j=0;
    char ch;
    while(1)
    {
        ch = input_word[i++];
       
        if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
        {
            output_word[j++] = ch;
        }
        else if(ch == '\0')
        {
            output_word[j]='\0';
            break;
        }
        else
        {
            continue;
        }
    }
}

int find_min_seq(char *parsed_word,int parse_pos,char ref_word[][15],int ref_len)
{
    int result = 0;
    int i,j;


    for(i=0;i<ref_len;i++)
    {
        if(strcasecmp(parsed_word,ref_word[i]) == 0)
        {
            if(cmp_table[OFFSET+i] == 0)
            {
                cmp_table[LENGTH]++;
            }
           
            if(cmp_table[OFFSET+i] == cmp_table[START])
            {               
                cmp_table[OFFSET+i] = parse_pos;
                cmp_table[START] = get_min_value(ref_len);
            }
            else
            {
                cmp_table[OFFSET+i] = parse_pos;
            }
            cmp_table[END] = parse_pos;
           
            if(cmp_table[LENGTH] == ref_len)
            {
                if( (result_table[END]- result_table[START])> (cmp_table[END] - cmp_table[START]) )
                {
                    result_table[START] = cmp_table[START];
                    result_table[END] = cmp_table[END];
                }
                if(ref_len == (result_table[END]- result_table[START]+1))
                {
                    result = 1;
                }
                else
                {
                    result = 0;
                }
            }
            break;
        }
       
    }
#if DEBUG
    for(j=0;j<ref_len+OFFSET;j++)
        {
            printf("%d ",cmp_table[j]);
        }
        printf("\n");
#endif
    return(result);
}

int get_min_value(int len)
{
int min_val = cmp_table[OFFSET],i;

    for(i=OFFSET+1;i<len+OFFSET;i++)
    {
        if((cmp_table[i] !=0) && (min_val > cmp_table[i]))
        {
            min_val = cmp_table[i];
        }
    }

return(min_val);
}

No comments:

Post a Comment