0%

使用字典树实现对句子单词个数的统计

曾经看到有一道题,怎么在不使用C++的STL的情况下,完成句子中单词存储的个数计数。

比如输入
Hello everybody! He is Steve.

输出
He 1
Hello 1
Steve 1
everybody 1
is 1

我这里考虑使用字典树的做法,定义一个特殊的字典树节点:
struct node{
char x;
struct node * children[256];
int count;
bool isEnd;
node(){
isEnd = false;
count = 0;
memset(children, 0, sizeof(children));
}
};

每当单词结束,就把isEnd标记为true,并把count ++;
我们在所有单词处理完毕以后,我们只需要深度优先遍历树就可以输出所有单词。

完整代码如下(C++解法):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#include<iostream>
#include<stdlib.h>

using namespace std;

char keys[10000];

struct node{
char x;
struct node * children[256];
int count;
bool isEnd;
node(){
isEnd = false;
count = 0;
memset(children, 0, sizeof(children));
}
};

void buildTree( struct node * root , char * str ){
if( str == NULL || strlen(str) <= 0 ){
return;
}
struct node * current = root;
int position = 0;
while ( str[position] ) {
if( (str[position] >= 'a' && str[position] <= 'z') || ( str[position] >= 'A' && str[position] <= 'Z') ){
if( current -> children[str[position]] == NULL ){
current -> children[str[position]] = new node;
current -> children[str[position]] -> x = str[position];
current = current -> children[str[position]] ;
}else{
current = current -> children[str[position]] ;
}
}else{
if( current != root ){
current -> count ++;
current -> isEnd = true;
current = root;
}
}
position ++;
}
}

void outputTree( struct node * root , int level ){
if( root != NULL){
keys[level] = root -> x;
if(root->isEnd){
for ( int i = 1 ; i <= level; i ++ ) {
cout<< keys[i];
}
cout << " "<<root->count << endl;
}
for(int i = 0 ; i < 256 ; i ++){
if( root -> children [i] != NULL ) {
outputTree( root ->children[i] , level +1);
}
}
}
}

void destroy( struct node * root ){
if( root != NULL){
for(int i = 0 ; i < 256 ; i ++){
if( root -> children [i] != NULL ) {
destroy( root ->children[i]);
}
}
delete root;
}
}


int main(){
char str[] = " Hello everybody! He is Steve. He is a boy.";
struct node * root = new node;
buildTree( root , str );
outputTree( root ,1 );
destroy(root);
return 1;
}