-
Notifications
You must be signed in to change notification settings - Fork 0
/
common_words.cpp
116 lines (104 loc) · 3 KB
/
common_words.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/**
* @file common_words.cpp
* Implementation of the CommonWords class.
*
* @author Zach Widder
* @date Fall 2014
*/
#include "common_words.h"
#include <fstream>
#include <string>
#include <vector>
#include <iostream>
#include <iterator>
#include <algorithm>
using std::string;
using std::vector;
using std::ifstream;
using std::cout;
using std::endl;
using std::feof;
string remove_punct(const string& str)
{
string ret;
std::remove_copy_if(str.begin(), str.end(), std::back_inserter(ret),
std::ptr_fun<int, int>(&std::ispunct));
return ret;
}
CommonWords::CommonWords(const vector<string>& filenames)
{
// initialize all member variables
init_file_word_maps(filenames);
init_common();
}
void CommonWords::init_file_word_maps(const vector<string>& filenames)
{
// make the length of file_word_maps the same as the length of filenames
file_word_maps.resize(filenames.size());
// go through all files
for (size_t i = 0; i < filenames.size(); i++) {
// get the corresponding vector of words that represents the current file
vector<string> words = file_to_vector(filenames[i]);
// go through all the words in the file
/* Your code goes here! */
map<string, unsigned int> curr;
for(string x : words){
if(curr.find(x)==curr.end())
curr[x] = 0;
curr[x]++;
}
file_word_maps[i] = curr;
}
}
void CommonWords::init_common()
{
/* Your code goes here! */
for(std::pair<string,unsigned int> a : file_word_maps[0]){
unsigned int times = UINT_MAX;
int limit = file_word_maps.size();
for(int i = 1; i < limit; i++){
if(file_word_maps[i].find(a.first)==file_word_maps[i].end())
goto label;
if(file_word_maps[i][a.first] < times)
times = file_word_maps[i][a.first];
}
common.insert(std::pair<string,unsigned int>(a.first, times));
label:
continue;
}
}
/**
* @param n The number of times to word has to appear.
* @return A vector of strings. The vector contains all words that appear
* in each file >= n times.
*/
vector<string> CommonWords::get_common_words(unsigned int n) const
{
vector<string> out;
/* Your code goes here! */
// for(auto x : common){
// std::cout << x.first << " " << x.second << std::endl;
// }
for(auto temp : common)
if(temp.second>=n) {
out.push_back(temp.first);
}
return out;
}
/**
* Takes a filename and transforms it to a vector of all words in that file.
* @param filename The name of the file that will fill the vector
*/
vector<string> CommonWords::file_to_vector(const string& filename) const
{
ifstream words(filename);
vector<string> out;
if (words.is_open()) {
std::istream_iterator<string> word_iter(words);
while (!words.eof()) {
out.push_back(remove_punct(*word_iter));
++word_iter;
}
}
return out;
}