Skip to content

Commit 7d9815c

Browse files
paudelritijkoppor
andauthored
Add Pseudonymization to CLI (#13158)
* Add Pseudonymization to CLI - A new pseudonymize command has been created. - Test cases has been added. - The display of commands has been sorted for ease of use. - An entry has been added to CHANGELOG.md. * Reformat Pseudonymize command - Implement ADR 0045 - Add -f / --force flag to overwrite if file exist - Reformat saving of database to save meta-data as well - Add methods from FileUtil - Reformat CHANGELOG.md entry - Add JabRef_en.properties entry - A Comment added - Remove duplicate file Chocolate.bib - Improve logger and localization lang - Remove trivial test case - Rename all occurrence of word anon. to pseudo * Remove exclamation(!) sign * Rename BibTex to BibTeX * Refactor localization messages and update ADR annotation * Add implementation dependency for ADR * Update JabRef_en.properties * Improve output file path handling and consolidate file existence check - Default output file path is now the same as input file location if not specified. - Created a single method to check for file existence. * Add new class to JabKitLauncher * Enhance CLI output messages * Refactor methods to display success message after successful save --------- Co-authored-by: Oliver Kopp <[email protected]>
1 parent 33522ea commit 7d9815c

File tree

8 files changed

+152
-6
lines changed

8 files changed

+152
-6
lines changed

.jbang/JabKitLauncher.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
//SOURCES ../jabkit/src/main/java/org/jabref/cli/Pdf.java
1616
//SOURCES ../jabkit/src/main/java/org/jabref/cli/PdfUpdate.java
1717
//SOURCES ../jabkit/src/main/java/org/jabref/cli/Preferences.java
18+
//SOURCES ../jabkit/src/main/java/org/jabref/cli/Pseudonymize.java
1819
//SOURCES ../jabkit/src/main/java/org/jabref/cli/Search.java
1920
//SOURCES ../jabkit/src/main/java/org/jabref/JabKit.java
2021
//FILES tinylog.properties=../jabkit/src/main/resources/tinylog.properties

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
2424
- We added an "Open example library" button to Welcome Tab. [#13014](https://github.com/JabRef/jabref/issues/13014)
2525
- We added automatic detection and selection of the identifier type (e.g., DOI, ISBN, arXiv) based on clipboard content when opening the "New Entry" dialog [#13111](https://github.com/JabRef/jabref/pull/13111)
2626
- We added support for import of a Refer/BibIX file format. [#13069](https://github.com/JabRef/jabref/issues/13069)
27+
- We added a new `jabkit` command `pseudonymize` to pseudonymize the library. [#13109](https://github.com/JabRef/jabref/issues/13109)
2728
- We added functionality to focus running instance when trying to start a second instance. [#13129](https://github.com/JabRef/jabref/issues/13129)
2829

2930
### Changed

jabkit/build.gradle.kts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ dependencies {
5757

5858
implementation("org.apache.lucene:lucene-queryparser:${luceneVersion}")
5959

60+
implementation("io.github.adr:e-adr:2.0.0-SNAPSHOT")
61+
6062
testImplementation(project(":test-support"))
6163
testImplementation("org.mockito:mockito-core:5.18.0") {
6264
exclude(group = "net.bytebuddy", module = "byte-buddy")

jabkit/src/main/java/module-info.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,10 @@
2020
requires org.tinylog.api;
2121
requires org.tinylog.api.slf4j;
2222
requires org.tinylog.impl;
23+
2324
requires java.xml;
25+
26+
// region: other libraries (alphabetically)
27+
requires io.github.adr;
28+
// endregion
2429
}

jabkit/src/main/java/org/jabref/cli/ArgumentProcessor.java

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.jabref.logic.os.OS;
2525
import org.jabref.logic.preferences.CliPreferences;
2626
import org.jabref.logic.util.BuildInfo;
27+
import org.jabref.logic.util.io.FileUtil;
2728
import org.jabref.model.database.BibDatabase;
2829
import org.jabref.model.database.BibDatabaseContext;
2930
import org.jabref.model.entry.BibEntryTypesManager;
@@ -38,16 +39,18 @@
3839

3940
@Command(name = "jabkit",
4041
mixinStandardHelpOptions = true,
42+
// sorted alphabetically
4143
subcommands = {
42-
GenerateCitationKeys.class,
4344
CheckConsistency.class,
4445
// CheckIntegrity.class,
45-
Fetch.class,
46-
Search.class,
4746
Convert.class,
47+
Fetch.class,
4848
GenerateBibFromAux.class,
49+
GenerateCitationKeys.class,
50+
Pdf.class,
4951
Preferences.class,
50-
Pdf.class
52+
Pseudonymize.class,
53+
Search.class
5154
})
5255
public class ArgumentProcessor implements Runnable {
5356
private static final Logger LOGGER = LoggerFactory.getLogger(ArgumentProcessor.class);
@@ -150,8 +153,17 @@ protected static void saveDatabase(CliPreferences cliPreferences,
150153
BibEntryTypesManager entryTypesManager,
151154
BibDatabase newBase,
152155
Path outputFile) {
156+
saveDatabaseContext(cliPreferences, entryTypesManager, new BibDatabaseContext(newBase), outputFile);
157+
}
158+
159+
protected static void saveDatabaseContext(CliPreferences cliPreferences,
160+
BibEntryTypesManager entryTypesManager,
161+
BibDatabaseContext bibDatabaseContext,
162+
Path outputFile) {
153163
try {
154-
System.out.println(Localization.lang("Saving") + ": " + outputFile);
164+
if (!FileUtil.isBibFile(outputFile)) {
165+
System.err.println(Localization.lang("Invalid output file type provided."));
166+
}
155167
try (AtomicFileWriter fileWriter = new AtomicFileWriter(outputFile, StandardCharsets.UTF_8)) {
156168
BibWriter bibWriter = new BibWriter(fileWriter, OS.NEWLINE);
157169
SelfContainedSaveConfiguration saveConfiguration = (SelfContainedSaveConfiguration) new SelfContainedSaveConfiguration()
@@ -162,13 +174,14 @@ protected static void saveDatabase(CliPreferences cliPreferences,
162174
cliPreferences.getFieldPreferences(),
163175
cliPreferences.getCitationKeyPatternPreferences(),
164176
entryTypesManager);
165-
databaseWriter.saveDatabase(new BibDatabaseContext(newBase));
177+
databaseWriter.saveDatabase(bibDatabaseContext);
166178

167179
// Show just a warning message if encoding did not work for all characters:
168180
if (fileWriter.hasEncodingProblems()) {
169181
System.err.println(Localization.lang("Warning") + ": "
170182
+ Localization.lang("UTF-8 could not be used to encode the following characters: %0", fileWriter.getEncodingProblems()));
171183
}
184+
System.out.println(Localization.lang("Saved %0.", outputFile));
172185
}
173186
} catch (IOException ex) {
174187
System.err.println(Localization.lang("Could not save file.") + "\n" + ex.getLocalizedMessage());
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
package org.jabref.cli;
2+
3+
import java.io.IOException;
4+
import java.nio.file.Files;
5+
import java.nio.file.Path;
6+
import java.util.Optional;
7+
8+
import org.jabref.logic.importer.ParserResult;
9+
import org.jabref.logic.l10n.Localization;
10+
import org.jabref.logic.pseudonymization.Pseudonymization;
11+
import org.jabref.logic.pseudonymization.PseudonymizationResultCsvWriter;
12+
import org.jabref.logic.util.io.FileUtil;
13+
import org.jabref.model.database.BibDatabaseContext;
14+
15+
import io.github.adr.linked.ADR;
16+
import org.slf4j.Logger;
17+
import org.slf4j.LoggerFactory;
18+
import picocli.CommandLine.Command;
19+
import picocli.CommandLine.Mixin;
20+
import picocli.CommandLine.Option;
21+
import picocli.CommandLine.ParentCommand;
22+
23+
@Command(name = "pseudonymize", description = "Perform pseudonymization of the library")
24+
public class Pseudonymize implements Runnable {
25+
private final static Logger LOGGER = LoggerFactory.getLogger(Pseudonymize.class);
26+
private static final String PSEUDO_SUFFIX = ".pseudo";
27+
private static final String BIB_EXTENSION = ".bib";
28+
private static final String CSV_EXTENSION = ".csv";
29+
30+
@ParentCommand
31+
private ArgumentProcessor argumentProcessor;
32+
33+
@Mixin
34+
private ArgumentProcessor.SharedOptions sharedOptions = new ArgumentProcessor.SharedOptions();
35+
36+
@ADR(45)
37+
@Option(names = {"--input"}, description = "BibTeX file to be pseudonymized", required = true)
38+
private String inputFile;
39+
40+
@Option(names = {"--output"}, description = "Output pseudo-bib file")
41+
private String outputFile;
42+
43+
@Option(names = {"--key"}, description = "Output pseudo-keys file")
44+
private String keyFile;
45+
46+
@Option(names = {"-f", "--force"}, description = "Overwrite output file(s) if any exist(s)")
47+
private boolean force;
48+
49+
@Override
50+
public void run() {
51+
Path inputPath = Path.of(inputFile);
52+
String fileName = FileUtil.getBaseName(inputFile);
53+
Path pseudoBibPath = resolveOutputPath(outputFile, inputPath, fileName + PSEUDO_SUFFIX + BIB_EXTENSION);
54+
Path pseudoKeyPath = resolveOutputPath(keyFile, inputPath, fileName + PSEUDO_SUFFIX + CSV_EXTENSION);
55+
56+
Optional<ParserResult> parserResult = ArgumentProcessor.importFile(
57+
inputFile,
58+
"bibtex",
59+
argumentProcessor.cliPreferences,
60+
sharedOptions.porcelain);
61+
62+
if (parserResult.isEmpty()) {
63+
System.out.println(Localization.lang("Unable to open file '%0'.", inputFile));
64+
return;
65+
}
66+
67+
if (parserResult.get().isInvalid()) {
68+
System.out.println(Localization.lang("Input file '%0' is invalid and could not be parsed.", inputFile));
69+
return;
70+
}
71+
72+
System.out.println(Localization.lang("Pseudonymizing library '%0'...", fileName));
73+
Pseudonymization pseudonymization = new Pseudonymization();
74+
BibDatabaseContext databaseContext = parserResult.get().getDatabaseContext();
75+
Pseudonymization.Result result = pseudonymization.pseudonymizeLibrary(databaseContext);
76+
77+
if (!fileOverwriteCheck(pseudoBibPath)) {
78+
return;
79+
}
80+
81+
ArgumentProcessor.saveDatabaseContext(
82+
argumentProcessor.cliPreferences,
83+
argumentProcessor.entryTypesManager,
84+
result.bibDatabaseContext(),
85+
pseudoBibPath);
86+
87+
if (!fileOverwriteCheck(pseudoKeyPath)) {
88+
return;
89+
}
90+
91+
try {
92+
PseudonymizationResultCsvWriter.writeValuesMappingAsCsv(pseudoKeyPath, result);
93+
System.out.println(Localization.lang("Saved %0.", pseudoKeyPath));
94+
} catch (IOException ex) {
95+
LOGGER.error("Unable to save keys for pseudonymized library", ex);
96+
}
97+
}
98+
99+
private Path resolveOutputPath(String customPath, Path inputPath, String defaultFileName) {
100+
return customPath != null ? Path.of(customPath) : inputPath.getParent().resolve(defaultFileName);
101+
}
102+
103+
private boolean fileOverwriteCheck(Path filePath) {
104+
if (!Files.exists(filePath)) {
105+
return true;
106+
}
107+
108+
String fileName = filePath.getFileName().toString();
109+
110+
if (!force) {
111+
System.out.println(Localization.lang("File '%0' already exists. Use -f or --force to overwrite.", fileName));
112+
return false;
113+
}
114+
115+
System.out.println(Localization.lang("File '%0' already exists. Overwriting.", fileName));
116+
return true;
117+
}
118+
}

jablib/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
exports org.jabref.logic.shared.event;
104104
exports org.jabref.logic.crawler;
105105
exports org.jabref.logic.git;
106+
exports org.jabref.logic.pseudonymization;
106107

107108
requires java.base;
108109

jablib/src/main/resources/l10n/JabRef_en.properties

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2946,3 +2946,8 @@ The\ following\ providers\ are\ available\:=The following providers are availabl
29462946
Unable\ to\ open\ file\ '%0'.=Unable to open file '%0'.
29472947
Unknown\ export\ format\ '%0'.=Unknown export format '%0'.
29482948
Updating\ PDF\ metadata.=Updating PDF metadata.
2949+
File\ '%0'\ already\ exists.\ Overwriting.=File '%0' already exists. Overwriting.
2950+
File\ '%0'\ already\ exists.\ Use\ -f\ or\ --force\ to\ overwrite.=File '%0' already exists. Use -f or --force to overwrite.
2951+
Pseudonymizing\ library\ '%0'...=Pseudonymizing library '%0'...
2952+
Invalid\ output\ file\ type\ provided.=Invalid output file type provided.
2953+
Saved\ %0.=Saved %0.

0 commit comments

Comments
 (0)