#include #include #include #include #define BUFFER_SIZE 1024 typedef struct pair{ char * string; int count; struct pair * next; } pair; int unique_count = 0; pair ** storage; int storage_size = 19997; static int max_printed = 1000000; unsigned int hash(char * s){ unsigned int value = 0; char * sptr = s; while (*sptr){ value = value*31 + *sptr; sptr++; } return value % storage_size; } int comparePair(const void * a, const void * b){ pair ** aptr = (pair **) a; pair ** bptr = (pair **) b; int diff = (*bptr)->count - (*aptr)->count; return diff == 0 ? strcmp((*aptr)->string,(*bptr)->string) : diff; } void process(char * s){ int slen = strlen(s); if (slen == 0) return; int k; for(k=0;k < slen; k++){ s[k] = tolower(s[k]); } unsigned int bucket = hash(s); pair * ptr = storage[bucket]; while (ptr){ if (strcmp(ptr->string,s) == 0){ ptr->count++; return; } ptr = ptr->next; } unique_count++; ptr = storage[bucket]; storage[bucket] = (pair *) malloc(sizeof(pair)); storage[bucket]->string = (char *) malloc(slen+1); strcpy(storage[bucket]->string,s); storage[bucket]->count = 1; storage[bucket]->next = ptr; } void output(){ pair ** array = (pair **)malloc(sizeof(pair *)*unique_count); int stored = 0; int k; for(k=0; k < storage_size; k++){ pair * ptr = storage[k]; while (ptr){ array[stored++] = ptr; ptr = ptr->next; } } qsort(array,unique_count,sizeof(pair *),comparePair); for(k=0; k < unique_count; k++){ if (k >= max_printed) break; printf("%d\t%s\n",array[k]->count,array[k]->string); } } int main(int argc, char ** argv){ char buf[BUFFER_SIZE]; char * bufptr = buf; storage = (pair **) malloc(sizeof(pair *)*storage_size); if (argc <= 1){ printf("usage: %s filename\n",argv[0]); exit(0); } FILE * file = fopen(argv[1],"r"); if (file == 0) { exit(0); } while (fscanf(file,"%s",buf) != EOF){ process(buf); } output(); }