Replies: 2 comments
-
i found this and adjusted it to my need. But perhaps there is an easier way? #!/usr/bin/env python3
import argparse
import os
import sys
import subprocess
import tempfile
import uuid
from chardet import detect
try:
import git_filter_repo as fr
except ImportError:
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
tmpdir = None
blobs_handled = {}
cat_file_process = None
def lint_with_real_filenames(commit, metadata):
filenames_to_tmp_map = {}
#print("Manipulating commit ",commit.original_id)
for change in commit.file_changes:
if change.type == b'D':
continue
elif not change.filename.lower().endswith(b".java"):
continue
else:
# Get the old blob contents
cat_file_process.stdin.write(change.blob_id + b'\n')
cat_file_process.stdin.flush()
objhash, objtype, objsize = cat_file_process.stdout.readline().split()
contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1)
# Write it out to a file
filename = os.path.join(tmpdir, os.fsencode(str(uuid.uuid4()) + ".java"))
with open(filename, "wb") as f:
f.write(contents_plus_newline[:-1])
#chardet didn't detected the files correct. but linux file did. so we use file for encoding detection
result = subprocess.run(['file', '--brief','--mime-encoding','--print0',filename.decode('utf-8')], stdout=subprocess.PIPE)
encoding=result.stdout.decode('utf-8').strip()
os.remove(filename)
if "utf-8" in encoding:
print("encoding guess of java file: [",change.filename.decode("utf-8"),"] was [",encoding,"] skipping.")
continue
if "us-ascii" in encoding:
print("encoding guess of java file: [",change.filename.decode("utf-8"),"] was [",encoding,"] no conversion needed. skipping.")
continue
if "unknown-8bit" in encoding:
print("encoding guess of java file: [",change.filename.decode("utf-8"),"] was [",encoding,"] skipping.")
continue
if "binary" in encoding:
print("encoding guess of java file: [",change.filename.decode("utf-8"),"] was [",encoding,"] skipping.")
continue
print("encoding guess of java file: [",change.filename.decode("utf-8"),"] was [",encoding,"]")
# Write it to a temp file encoded in utf-8
filenameUtf8 = os.path.join(tmpdir, os.fsencode(str(uuid.uuid4()) + ".java"))
filenames_to_tmp_map[change.filename] = filenameUtf8
contents_plus_newline = contents_plus_newline.decode(encoding).encode("utf-8")
with open(filenameUtf8, "wb") as f:
f.write(contents_plus_newline[:-1])
# update history
for change in commit.file_changes:
if change.blob_id in blobs_handled:
change.blob_id = blobs_handled[change.blob_id]
elif change.type == b'D':
continue
elif not change.filename.lower().endswith(b".java"):
continue
elif not change.filename in filenames_to_tmp_map:
continue
else:
filename = filenames_to_tmp_map[change.filename]
# Get the new contents
with open(filename, "rb") as f:
blob = fr.Blob(f.read())
# Insert the new file into the filter's stream, and remove the tempfile
filter.insert(blob)
os.remove(filename)
# Record our handling of the blob and use it for this change
blobs_handled[change.blob_id] = blob.id
change.blob_id = blob.id
args = fr.FilteringOptions.default_options()
args.force = True
# actually start formatting procedure
tmpdir = tempfile.mkdtemp().encode()
cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'],
stdin = subprocess.PIPE,
stdout = subprocess.PIPE)
filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames)
filter.run()
cat_file_process.stdin.close()
cat_file_process.wait() |
Beta Was this translation helpful? Give feedback.
-
The example you found was essentially a copy of contrib/filter-repo-demos/lint-history in this repository, with a few small tweaks. I suspect it pre-dated the --relevant option of lint-history. If you made a simple script that took a simple filename as a parameter and would convert it to utf-8, so that you could run e.g. But, copying the contrib/filter-repo-demos/lint-history file and tweaking it to suit your needs (as you indirectly did via copying that other example you found) works too. |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
Is there any way to filter specific files and don't touch the other files/directories? My goal is to convert all files "*.java" to utf-8 in a project. If i do:
the java files are perfect converted. But i lost all other files. Or did i miss something.
Beta Was this translation helpful? Give feedback.
All reactions