-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommon_words.h
75 lines (63 loc) · 1.81 KB
/
common_words.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/**
* @file common_words.h
* Header file of a class to find all the common words across multiple files
*
* @author Zach Widder
* @date Fall 2014
*/
#ifndef COMMON_WORDS_H
#define COMMON_WORDS_H
#include <string>
#include <map>
#include <vector>
using std::map;
using std::vector;
using std::string;
/**
* CommonWords class. Definition of a class to find all the common words across
* multiple files
*
* @author Zach Widder
* @date October 2014
*/
class CommonWords
{
public:
/**
* Constructs a CommonWords object from a vector of filenames.
* @param filenames The list of filenames to read.
*/
CommonWords(const std::vector<std::string>& filenames);
/**
* @param n The number of times to word has to appear.
* @return A vector of strings. The vector contains all words that appear
* in each file >= n times.
*/
std::vector<std::string> get_common_words(unsigned int n) const;
private:
/**
* #file_word_maps holds a map for each file. Each map associates a word in
* that file to the number of times it has been seen in that file.
*/
vector<map<string, unsigned int>> file_word_maps;
/**
* #common maps a word to the number of documents that word appears in
*/
std::map<std::string, unsigned int> common;
/* helper functions */
/**
* Initializes #file_word_maps.
* @param filenames The vector of names of the files that will be used
*/
void init_file_word_maps(const vector<string>& filenames);
/**
* Initializes #common.
*/
void init_common();
/**
* Takes a filename and transforms it to a vector of all words in that file.
* @param filename The name of the file that will fill the vector
*/
vector<string> file_to_vector(const string& filename) const;
};
#endif