diff --git a/.jbang/JabKitLauncher.java b/.jbang/JabKitLauncher.java index 483f3e5060d..2849fe4f8f9 100644 --- a/.jbang/JabKitLauncher.java +++ b/.jbang/JabKitLauncher.java @@ -15,6 +15,7 @@ //SOURCES ../jabkit/src/main/java/org/jabref/cli/Pdf.java //SOURCES ../jabkit/src/main/java/org/jabref/cli/PdfUpdate.java //SOURCES ../jabkit/src/main/java/org/jabref/cli/Preferences.java +//SOURCES ../jabkit/src/main/java/org/jabref/cli/Pseudonymize.java //SOURCES ../jabkit/src/main/java/org/jabref/cli/Search.java //SOURCES ../jabkit/src/main/java/org/jabref/JabKit.java //FILES tinylog.properties=../jabkit/src/main/resources/tinylog.properties diff --git a/CHANGELOG.md b/CHANGELOG.md index dcfa647dc25..ee0ac9df3f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - We added an "Open example library" button to Welcome Tab. [#13014](https://github.com/JabRef/jabref/issues/13014) - We added automatic detection and selection of the identifier type (e.g., DOI, ISBN, arXiv) based on clipboard content when opening the "New Entry" dialog [#13111](https://github.com/JabRef/jabref/pull/13111) - We added support for import of a Refer/BibIX file format. [#13069](https://github.com/JabRef/jabref/issues/13069) +- We added a new `jabkit` command `pseudonymize` to pseudonymize the library. [#13109](https://github.com/JabRef/jabref/issues/13109) - We added functionality to focus running instance when trying to start a second instance. [#13129](https://github.com/JabRef/jabref/issues/13129) ### Changed diff --git a/jabkit/build.gradle.kts b/jabkit/build.gradle.kts index 5dccab19a5f..577818e7a61 100644 --- a/jabkit/build.gradle.kts +++ b/jabkit/build.gradle.kts @@ -57,6 +57,8 @@ dependencies { implementation("org.apache.lucene:lucene-queryparser:${luceneVersion}") + implementation("io.github.adr:e-adr:2.0.0-SNAPSHOT") + testImplementation(project(":test-support")) testImplementation("org.mockito:mockito-core:5.18.0") { exclude(group = "net.bytebuddy", module = "byte-buddy") diff --git a/jabkit/src/main/java/module-info.java b/jabkit/src/main/java/module-info.java index 58fdeddc2ae..68a32d97644 100644 --- a/jabkit/src/main/java/module-info.java +++ b/jabkit/src/main/java/module-info.java @@ -20,5 +20,10 @@ requires org.tinylog.api; requires org.tinylog.api.slf4j; requires org.tinylog.impl; + requires java.xml; + + // region: other libraries (alphabetically) + requires io.github.adr; + // endregion } diff --git a/jabkit/src/main/java/org/jabref/cli/ArgumentProcessor.java b/jabkit/src/main/java/org/jabref/cli/ArgumentProcessor.java index 040b06f6237..74fc19fe677 100644 --- a/jabkit/src/main/java/org/jabref/cli/ArgumentProcessor.java +++ b/jabkit/src/main/java/org/jabref/cli/ArgumentProcessor.java @@ -24,6 +24,7 @@ import org.jabref.logic.os.OS; import org.jabref.logic.preferences.CliPreferences; import org.jabref.logic.util.BuildInfo; +import org.jabref.logic.util.io.FileUtil; import org.jabref.model.database.BibDatabase; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.entry.BibEntryTypesManager; @@ -38,16 +39,18 @@ @Command(name = "jabkit", mixinStandardHelpOptions = true, + // sorted alphabetically subcommands = { - GenerateCitationKeys.class, CheckConsistency.class, // CheckIntegrity.class, - Fetch.class, - Search.class, Convert.class, + Fetch.class, GenerateBibFromAux.class, + GenerateCitationKeys.class, + Pdf.class, Preferences.class, - Pdf.class + Pseudonymize.class, + Search.class }) public class ArgumentProcessor implements Runnable { private static final Logger LOGGER = LoggerFactory.getLogger(ArgumentProcessor.class); @@ -150,8 +153,17 @@ protected static void saveDatabase(CliPreferences cliPreferences, BibEntryTypesManager entryTypesManager, BibDatabase newBase, Path outputFile) { + saveDatabaseContext(cliPreferences, entryTypesManager, new BibDatabaseContext(newBase), outputFile); + } + + protected static void saveDatabaseContext(CliPreferences cliPreferences, + BibEntryTypesManager entryTypesManager, + BibDatabaseContext bibDatabaseContext, + Path outputFile) { try { - System.out.println(Localization.lang("Saving") + ": " + outputFile); + if (!FileUtil.isBibFile(outputFile)) { + System.err.println(Localization.lang("Invalid output file type provided.")); + } try (AtomicFileWriter fileWriter = new AtomicFileWriter(outputFile, StandardCharsets.UTF_8)) { BibWriter bibWriter = new BibWriter(fileWriter, OS.NEWLINE); SelfContainedSaveConfiguration saveConfiguration = (SelfContainedSaveConfiguration) new SelfContainedSaveConfiguration() @@ -162,13 +174,14 @@ protected static void saveDatabase(CliPreferences cliPreferences, cliPreferences.getFieldPreferences(), cliPreferences.getCitationKeyPatternPreferences(), entryTypesManager); - databaseWriter.saveDatabase(new BibDatabaseContext(newBase)); + databaseWriter.saveDatabase(bibDatabaseContext); // Show just a warning message if encoding did not work for all characters: if (fileWriter.hasEncodingProblems()) { System.err.println(Localization.lang("Warning") + ": " + Localization.lang("UTF-8 could not be used to encode the following characters: %0", fileWriter.getEncodingProblems())); } + System.out.println(Localization.lang("Saved %0.", outputFile)); } } catch (IOException ex) { System.err.println(Localization.lang("Could not save file.") + "\n" + ex.getLocalizedMessage()); diff --git a/jabkit/src/main/java/org/jabref/cli/Pseudonymize.java b/jabkit/src/main/java/org/jabref/cli/Pseudonymize.java new file mode 100644 index 00000000000..22146798821 --- /dev/null +++ b/jabkit/src/main/java/org/jabref/cli/Pseudonymize.java @@ -0,0 +1,118 @@ +package org.jabref.cli; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; + +import org.jabref.logic.importer.ParserResult; +import org.jabref.logic.l10n.Localization; +import org.jabref.logic.pseudonymization.Pseudonymization; +import org.jabref.logic.pseudonymization.PseudonymizationResultCsvWriter; +import org.jabref.logic.util.io.FileUtil; +import org.jabref.model.database.BibDatabaseContext; + +import io.github.adr.linked.ADR; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine.Command; +import picocli.CommandLine.Mixin; +import picocli.CommandLine.Option; +import picocli.CommandLine.ParentCommand; + +@Command(name = "pseudonymize", description = "Perform pseudonymization of the library") +public class Pseudonymize implements Runnable { + private final static Logger LOGGER = LoggerFactory.getLogger(Pseudonymize.class); + private static final String PSEUDO_SUFFIX = ".pseudo"; + private static final String BIB_EXTENSION = ".bib"; + private static final String CSV_EXTENSION = ".csv"; + + @ParentCommand + private ArgumentProcessor argumentProcessor; + + @Mixin + private ArgumentProcessor.SharedOptions sharedOptions = new ArgumentProcessor.SharedOptions(); + + @ADR(45) + @Option(names = {"--input"}, description = "BibTeX file to be pseudonymized", required = true) + private String inputFile; + + @Option(names = {"--output"}, description = "Output pseudo-bib file") + private String outputFile; + + @Option(names = {"--key"}, description = "Output pseudo-keys file") + private String keyFile; + + @Option(names = {"-f", "--force"}, description = "Overwrite output file(s) if any exist(s)") + private boolean force; + + @Override + public void run() { + Path inputPath = Path.of(inputFile); + String fileName = FileUtil.getBaseName(inputFile); + Path pseudoBibPath = resolveOutputPath(outputFile, inputPath, fileName + PSEUDO_SUFFIX + BIB_EXTENSION); + Path pseudoKeyPath = resolveOutputPath(keyFile, inputPath, fileName + PSEUDO_SUFFIX + CSV_EXTENSION); + + Optional parserResult = ArgumentProcessor.importFile( + inputFile, + "bibtex", + argumentProcessor.cliPreferences, + sharedOptions.porcelain); + + if (parserResult.isEmpty()) { + System.out.println(Localization.lang("Unable to open file '%0'.", inputFile)); + return; + } + + if (parserResult.get().isInvalid()) { + System.out.println(Localization.lang("Input file '%0' is invalid and could not be parsed.", inputFile)); + return; + } + + System.out.println(Localization.lang("Pseudonymizing library '%0'...", fileName)); + Pseudonymization pseudonymization = new Pseudonymization(); + BibDatabaseContext databaseContext = parserResult.get().getDatabaseContext(); + Pseudonymization.Result result = pseudonymization.pseudonymizeLibrary(databaseContext); + + if (!fileOverwriteCheck(pseudoBibPath)) { + return; + } + + ArgumentProcessor.saveDatabaseContext( + argumentProcessor.cliPreferences, + argumentProcessor.entryTypesManager, + result.bibDatabaseContext(), + pseudoBibPath); + + if (!fileOverwriteCheck(pseudoKeyPath)) { + return; + } + + try { + PseudonymizationResultCsvWriter.writeValuesMappingAsCsv(pseudoKeyPath, result); + System.out.println(Localization.lang("Saved %0.", pseudoKeyPath)); + } catch (IOException ex) { + LOGGER.error("Unable to save keys for pseudonymized library", ex); + } + } + + private Path resolveOutputPath(String customPath, Path inputPath, String defaultFileName) { + return customPath != null ? Path.of(customPath) : inputPath.getParent().resolve(defaultFileName); + } + + private boolean fileOverwriteCheck(Path filePath) { + if (!Files.exists(filePath)) { + return true; + } + + String fileName = filePath.getFileName().toString(); + + if (!force) { + System.out.println(Localization.lang("File '%0' already exists. Use -f or --force to overwrite.", fileName)); + return false; + } + + System.out.println(Localization.lang("File '%0' already exists. Overwriting.", fileName)); + return true; + } +} diff --git a/jablib/src/main/java/module-info.java b/jablib/src/main/java/module-info.java index 5c7b240c32f..a792ae43f7d 100644 --- a/jablib/src/main/java/module-info.java +++ b/jablib/src/main/java/module-info.java @@ -103,6 +103,7 @@ exports org.jabref.logic.shared.event; exports org.jabref.logic.crawler; exports org.jabref.logic.git; + exports org.jabref.logic.pseudonymization; requires java.base; diff --git a/jablib/src/main/resources/l10n/JabRef_en.properties b/jablib/src/main/resources/l10n/JabRef_en.properties index 4db28bf7487..dc4b6bd248f 100644 --- a/jablib/src/main/resources/l10n/JabRef_en.properties +++ b/jablib/src/main/resources/l10n/JabRef_en.properties @@ -2946,3 +2946,8 @@ The\ following\ providers\ are\ available\:=The following providers are availabl Unable\ to\ open\ file\ '%0'.=Unable to open file '%0'. Unknown\ export\ format\ '%0'.=Unknown export format '%0'. Updating\ PDF\ metadata.=Updating PDF metadata. +File\ '%0'\ already\ exists.\ Overwriting.=File '%0' already exists. Overwriting. +File\ '%0'\ already\ exists.\ Use\ -f\ or\ --force\ to\ overwrite.=File '%0' already exists. Use -f or --force to overwrite. +Pseudonymizing\ library\ '%0'...=Pseudonymizing library '%0'... +Invalid\ output\ file\ type\ provided.=Invalid output file type provided. +Saved\ %0.=Saved %0.