Skip to content

Add Pseudonymization to CLI #13158

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .jbang/JabKitLauncher.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
//SOURCES ../jabkit/src/main/java/org/jabref/cli/Pdf.java
//SOURCES ../jabkit/src/main/java/org/jabref/cli/PdfUpdate.java
//SOURCES ../jabkit/src/main/java/org/jabref/cli/Preferences.java
//SOURCES ../jabkit/src/main/java/org/jabref/cli/Pseudonymize.java
//SOURCES ../jabkit/src/main/java/org/jabref/cli/Search.java
//SOURCES ../jabkit/src/main/java/org/jabref/JabKit.java
//FILES tinylog.properties=../jabkit/src/main/resources/tinylog.properties
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- We added an "Open example library" button to Welcome Tab. [#13014](https://github.com/JabRef/jabref/issues/13014)
- We added automatic detection and selection of the identifier type (e.g., DOI, ISBN, arXiv) based on clipboard content when opening the "New Entry" dialog [#13111](https://github.com/JabRef/jabref/pull/13111)
- We added support for import of a Refer/BibIX file format. [#13069](https://github.com/JabRef/jabref/issues/13069)
- We added a new `jabkit` command `pseudonymize` to pseudonymize the library. [#13109](https://github.com/JabRef/jabref/issues/13109)
- We added functionality to focus running instance when trying to start a second instance. [#13129](https://github.com/JabRef/jabref/issues/13129)

### Changed
Expand Down
2 changes: 2 additions & 0 deletions jabkit/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ dependencies {

implementation("org.apache.lucene:lucene-queryparser:${luceneVersion}")

implementation("io.github.adr:e-adr:2.0.0-SNAPSHOT")

testImplementation(project(":test-support"))
testImplementation("org.mockito:mockito-core:5.18.0") {
exclude(group = "net.bytebuddy", module = "byte-buddy")
Expand Down
5 changes: 5 additions & 0 deletions jabkit/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,10 @@
requires org.tinylog.api;
requires org.tinylog.api.slf4j;
requires org.tinylog.impl;

requires java.xml;

// region: other libraries (alphabetically)
requires io.github.adr;
// endregion
}
25 changes: 19 additions & 6 deletions jabkit/src/main/java/org/jabref/cli/ArgumentProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.jabref.logic.os.OS;
import org.jabref.logic.preferences.CliPreferences;
import org.jabref.logic.util.BuildInfo;
import org.jabref.logic.util.io.FileUtil;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntryTypesManager;
Expand All @@ -38,16 +39,18 @@

@Command(name = "jabkit",
mixinStandardHelpOptions = true,
// sorted alphabetically
subcommands = {
GenerateCitationKeys.class,
CheckConsistency.class,
// CheckIntegrity.class,
Fetch.class,
Search.class,
Convert.class,
Fetch.class,
GenerateBibFromAux.class,
GenerateCitationKeys.class,
Pdf.class,
Preferences.class,
Pdf.class
Pseudonymize.class,
Search.class
})
public class ArgumentProcessor implements Runnable {
private static final Logger LOGGER = LoggerFactory.getLogger(ArgumentProcessor.class);
Expand Down Expand Up @@ -150,8 +153,17 @@ protected static void saveDatabase(CliPreferences cliPreferences,
BibEntryTypesManager entryTypesManager,
BibDatabase newBase,
Path outputFile) {
saveDatabaseContext(cliPreferences, entryTypesManager, new BibDatabaseContext(newBase), outputFile);
}

protected static void saveDatabaseContext(CliPreferences cliPreferences,
BibEntryTypesManager entryTypesManager,
BibDatabaseContext bibDatabaseContext,
Path outputFile) {
try {
System.out.println(Localization.lang("Saving") + ": " + outputFile);
if (!FileUtil.isBibFile(outputFile)) {
System.err.println(Localization.lang("Invalid output file type provided."));
}
try (AtomicFileWriter fileWriter = new AtomicFileWriter(outputFile, StandardCharsets.UTF_8)) {
BibWriter bibWriter = new BibWriter(fileWriter, OS.NEWLINE);
SelfContainedSaveConfiguration saveConfiguration = (SelfContainedSaveConfiguration) new SelfContainedSaveConfiguration()
Expand All @@ -162,13 +174,14 @@ protected static void saveDatabase(CliPreferences cliPreferences,
cliPreferences.getFieldPreferences(),
cliPreferences.getCitationKeyPatternPreferences(),
entryTypesManager);
databaseWriter.saveDatabase(new BibDatabaseContext(newBase));
databaseWriter.saveDatabase(bibDatabaseContext);

// Show just a warning message if encoding did not work for all characters:
if (fileWriter.hasEncodingProblems()) {
System.err.println(Localization.lang("Warning") + ": "
+ Localization.lang("UTF-8 could not be used to encode the following characters: %0", fileWriter.getEncodingProblems()));
}
System.out.println(Localization.lang("Saved %0.", outputFile));
}
} catch (IOException ex) {
System.err.println(Localization.lang("Could not save file.") + "\n" + ex.getLocalizedMessage());
Expand Down
118 changes: 118 additions & 0 deletions jabkit/src/main/java/org/jabref/cli/Pseudonymize.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package org.jabref.cli;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Optional;

import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.pseudonymization.Pseudonymization;
import org.jabref.logic.pseudonymization.PseudonymizationResultCsvWriter;
import org.jabref.logic.util.io.FileUtil;
import org.jabref.model.database.BibDatabaseContext;

import io.github.adr.linked.ADR;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import picocli.CommandLine.Command;
import picocli.CommandLine.Mixin;
import picocli.CommandLine.Option;
import picocli.CommandLine.ParentCommand;

@Command(name = "pseudonymize", description = "Perform pseudonymization of the library")
public class Pseudonymize implements Runnable {
private final static Logger LOGGER = LoggerFactory.getLogger(Pseudonymize.class);
private static final String PSEUDO_SUFFIX = ".pseudo";
private static final String BIB_EXTENSION = ".bib";
private static final String CSV_EXTENSION = ".csv";

@ParentCommand
private ArgumentProcessor argumentProcessor;

@Mixin
private ArgumentProcessor.SharedOptions sharedOptions = new ArgumentProcessor.SharedOptions();

@ADR(45)
@Option(names = {"--input"}, description = "BibTeX file to be pseudonymized", required = true)
private String inputFile;

@Option(names = {"--output"}, description = "Output pseudo-bib file")
private String outputFile;

@Option(names = {"--key"}, description = "Output pseudo-keys file")
private String keyFile;

@Option(names = {"-f", "--force"}, description = "Overwrite output file(s) if any exist(s)")
private boolean force;

@Override
public void run() {
Path inputPath = Path.of(inputFile);
String fileName = FileUtil.getBaseName(inputFile);
Path pseudoBibPath = resolveOutputPath(outputFile, inputPath, fileName + PSEUDO_SUFFIX + BIB_EXTENSION);
Path pseudoKeyPath = resolveOutputPath(keyFile, inputPath, fileName + PSEUDO_SUFFIX + CSV_EXTENSION);

Optional<ParserResult> parserResult = ArgumentProcessor.importFile(
inputFile,
"bibtex",
argumentProcessor.cliPreferences,
sharedOptions.porcelain);

if (parserResult.isEmpty()) {
System.out.println(Localization.lang("Unable to open file '%0'.", inputFile));
return;
}

if (parserResult.get().isInvalid()) {
System.out.println(Localization.lang("Input file '%0' is invalid and could not be parsed.", inputFile));
return;
}

System.out.println(Localization.lang("Pseudonymizing library '%0'...", fileName));
Pseudonymization pseudonymization = new Pseudonymization();
BibDatabaseContext databaseContext = parserResult.get().getDatabaseContext();
Pseudonymization.Result result = pseudonymization.pseudonymizeLibrary(databaseContext);

if (!fileOverwriteCheck(pseudoBibPath)) {
return;
}

ArgumentProcessor.saveDatabaseContext(
argumentProcessor.cliPreferences,
argumentProcessor.entryTypesManager,
result.bibDatabaseContext(),
pseudoBibPath);

if (!fileOverwriteCheck(pseudoKeyPath)) {
return;
}

try {
PseudonymizationResultCsvWriter.writeValuesMappingAsCsv(pseudoKeyPath, result);
System.out.println(Localization.lang("Saved %0.", pseudoKeyPath));
} catch (IOException ex) {
LOGGER.error("Unable to save keys for pseudonymized library", ex);
}
}

private Path resolveOutputPath(String customPath, Path inputPath, String defaultFileName) {
return customPath != null ? Path.of(customPath) : inputPath.getParent().resolve(defaultFileName);
}

private boolean fileOverwriteCheck(Path filePath) {
if (!Files.exists(filePath)) {
return true;
}

String fileName = filePath.getFileName().toString();

if (!force) {
System.out.println(Localization.lang("File '%0' already exists. Use -f or --force to overwrite.", fileName));
return false;
}

System.out.println(Localization.lang("File '%0' already exists. Overwriting.", fileName));
return true;
}
}
1 change: 1 addition & 0 deletions jablib/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
exports org.jabref.logic.shared.event;
exports org.jabref.logic.crawler;
exports org.jabref.logic.git;
exports org.jabref.logic.pseudonymization;

requires java.base;

Expand Down
5 changes: 5 additions & 0 deletions jablib/src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2946,3 +2946,8 @@ The\ following\ providers\ are\ available\:=The following providers are availabl
Unable\ to\ open\ file\ '%0'.=Unable to open file '%0'.
Unknown\ export\ format\ '%0'.=Unknown export format '%0'.
Updating\ PDF\ metadata.=Updating PDF metadata.
File\ '%0'\ already\ exists.\ Overwriting.=File '%0' already exists. Overwriting.
File\ '%0'\ already\ exists.\ Use\ -f\ or\ --force\ to\ overwrite.=File '%0' already exists. Use -f or --force to overwrite.
Pseudonymizing\ library\ '%0'...=Pseudonymizing library '%0'...
Invalid\ output\ file\ type\ provided.=Invalid output file type provided.
Saved\ %0.=Saved %0.
Loading