|
7 | 7 | import java.util.ArrayList;
|
8 | 8 |
|
9 | 9 | public class Qgram {
|
10 |
| - |
11 |
| - public String comp; //comparison string |
12 |
| - public ArrayList<String> output = new ArrayList<String>(1); //output list |
13 |
| - |
14 |
| - //constructor: takes file, comparison wstring, and edit distance; calls compare and calls outputReturn |
15 |
| - public Qgram(int num, String comp, String[] file) { |
16 |
| - |
17 |
| - String[] word = this.twoGrams(this.usefulLetters(comp)); |
18 |
| - int i = 0; |
19 |
| - while (i < file.length) { |
20 |
| - String[] gr = this.twoGrams(this.usefulLetters(file[i]); |
21 |
| - if (this.compare(gr, word, file[i], num)) { |
22 |
| - output.add(file[i]); |
23 |
| - } |
24 |
| - i ++; |
25 |
| - } |
26 |
| - } |
27 |
| - |
28 |
| - catch (FileNotFoundException e) { |
29 |
| - } |
30 |
| - catch (IOException e) { |
31 |
| - } |
32 |
| - |
33 |
| - this.outputReturn(); |
34 |
| - } |
35 |
| - |
36 |
| - //takes out all spaces and special characters and makes everything lower case |
37 |
| - public String usefulLetters(String s) { |
38 |
| - |
39 |
| - StringBuilder sb = new StringBuilder(); |
40 |
| - |
41 |
| - for (int i = 0; i < s.length(); i ++) { |
42 |
| - if (s.charAt(i) >= 'a' && s.charAt(i) <= 'z') { |
43 |
| - sb.append(s.charAt(i)); |
44 |
| - } |
45 |
| - else if (s.charAt(i) >= 'A' && s.charAt(i) <= 'Z') { |
46 |
| - sb.append(Character.toLowerCase(s.charAt(i))); |
47 |
| - } |
48 |
| - } |
49 |
| - |
50 |
| - return sb.toString(); |
51 |
| - } |
52 |
| - |
53 |
| - //separates out into 2-grams |
54 |
| - public String[] twoGrams(String s) { |
55 |
| - |
56 |
| - String[] gramString = new String[s.length() - 1]; |
57 |
| - |
58 |
| - for (int i = 0; i < s.length() - 1; i ++) { |
59 |
| - String temp = new StringBuilder().append(s.charAt(i)).append(s.charAt(i + 1)).toString(); |
60 |
| - gramString[i] = temp; |
61 |
| - } |
62 |
| - |
63 |
| - return gramString; |
64 |
| - } |
65 |
| - |
66 |
| - //compares the 2-grams in the input string to the 2-grams in that particular string in the data set |
67 |
| - public boolean compare(String[] comp, String[] word, String s, int num) { |
68 |
| - |
69 |
| - boolean same = false; |
70 |
| - int ed = 0; |
71 |
| - int i = 0; |
72 |
| - while (i < word.length) { |
73 |
| - for (int j = 0; j < comp.length; j ++) { |
74 |
| - if (word[i].equals(comp[j])) { |
75 |
| - same = true; |
76 |
| - } |
77 |
| - } |
78 |
| - if (!same) { |
79 |
| - ed ++; |
80 |
| - } |
81 |
| - if (ed > num){ |
82 |
| - break; |
83 |
| - } |
84 |
| - |
85 |
| - i ++; |
86 |
| - } |
87 |
| - |
88 |
| - if (ed <= num) { |
89 |
| - return true; |
90 |
| - } |
91 |
| - |
92 |
| - else { |
93 |
| - return false; |
94 |
| - } |
95 |
| - |
96 |
| - } |
97 |
| - |
98 |
| - //returns the arraylist of possible strings |
99 |
| - public ArrayList<String> outputReturn() { |
100 |
| - return this.output; |
101 |
| - } |
| 10 | + |
| 11 | + public String comp; //comparison string |
| 12 | + public ArrayList<String> output = new ArrayList<String>(1); //output list |
| 13 | + |
| 14 | + //constructor: takes file, comparison wstring, and edit distance; calls compare and calls outputReturn |
| 15 | + public Qgram(int num, String comp, String[] file) { |
| 16 | + |
| 17 | + String[] word = this.twoGrams(this.usefulLetters(comp)); |
| 18 | + int i = 0; |
| 19 | + while (i < file.length) { |
| 20 | + String[] gr = this.twoGrams(this.usefulLetters(file[i]); |
| 21 | + if (this.compare(gr, word, file[i], num)) { |
| 22 | + output.add(file[i]); |
| 23 | + } |
| 24 | + i ++; |
| 25 | + } |
| 26 | + } |
| 27 | + |
| 28 | + catch (FileNotFoundException e) { |
| 29 | + } |
| 30 | + catch (IOException e) { |
| 31 | + } |
| 32 | + |
| 33 | + this.outputReturn(); |
| 34 | +} |
| 35 | + |
| 36 | +//takes out all spaces and special characters and makes everything lower case |
| 37 | +public String usefulLetters(String s) { |
| 38 | + |
| 39 | + StringBuilder sb = new StringBuilder(); |
| 40 | + |
| 41 | + for (int i = 0; i < s.length(); i ++) { |
| 42 | + if (s.charAt(i) >= 'a' && s.charAt(i) <= 'z') { |
| 43 | + sb.append(s.charAt(i)); |
| 44 | + } |
| 45 | + else if (s.charAt(i) >= 'A' && s.charAt(i) <= 'Z') { |
| 46 | + sb.append(Character.toLowerCase(s.charAt(i))); |
| 47 | + } |
| 48 | + } |
| 49 | + |
| 50 | + return sb.toString(); |
| 51 | +} |
| 52 | + |
| 53 | +//separates out into 2-grams |
| 54 | +public String[] twoGrams(String s) { |
| 55 | + |
| 56 | + String[] gramString = new String[s.length() - 1]; |
| 57 | + |
| 58 | + for (int i = 0; i < s.length() - 1; i ++) { |
| 59 | + String temp = new StringBuilder().append(s.charAt(i)).append(s.charAt(i + 1)).toString(); |
| 60 | + gramString[i] = temp; |
| 61 | + } |
| 62 | + |
| 63 | + return gramString; |
| 64 | +} |
| 65 | + |
| 66 | +//compares the 2-grams in the input string to the 2-grams in that particular string in the data set |
| 67 | +public boolean compare(String[] comp, String[] word, String s, int num) { |
| 68 | + |
| 69 | + boolean same = false; |
| 70 | + int ed = 0; |
| 71 | + int i = 0; |
| 72 | + while (i < word.length) { |
| 73 | + for (int j = 0; j < comp.length; j ++) { |
| 74 | + if (word[i].equals(comp[j])) { |
| 75 | + same = true; |
| 76 | + } |
| 77 | + } |
| 78 | + if (!same) { |
| 79 | + ed ++; |
| 80 | + } |
| 81 | + if (ed > num){ |
| 82 | + break; |
| 83 | + } |
| 84 | + |
| 85 | + i ++; |
| 86 | + } |
| 87 | + |
| 88 | + if (ed <= num) { |
| 89 | + return true; |
| 90 | + } |
| 91 | + |
| 92 | + else { |
| 93 | + return false; |
| 94 | + } |
| 95 | + |
| 96 | +} |
| 97 | + |
| 98 | +//returns the arraylist of possible strings |
| 99 | +public ArrayList<String> outputReturn() { |
| 100 | + return this.output; |
| 101 | +} |
102 | 102 |
|
103 | 103 | }
|
0 commit comments