You are not logged in.
I've made a virtual file system from a few folders and want to check if there are any conflicting files.
/mnt/T:/mnt/V:/mnt/W:/mnt/X:/mnt/Y:/mnt/Z:/srv/nfs/lenovo/hdd1 /mnt/storage fuse.mergerfs allow_other,use_ino,cache.files=off,dropcacheonclose=true,x-gvfs-show 0 0
So I want to provide two or more folders and get files with the same path relative to their folders.
Usage: python file_conflicts.py <dir>... > conflicts.txt
How can I find the conflicts?
This is what I've done so far. The get_files and remove_duplicates functions aren't working as I expected.
import os
import shutil
import sys
from collections import Counter
from pathlib import Path
from typing import List
def main():
folders = sys.argv[1:]
if len(folders) < 2:
print("Please provide at least 2 folders")
exit(1)
files = get_files(folders)
conflicting_files = find_conflicting_files(files)
conflicting_files = remove_duplicates(conflicting_files)
print_conflicting_files(conflicting_files)
def get_files(folders):
files = []
for folder in folders:
files.extend([os.path.relpath(path, folder) for path in Path(folder).rglob("*")])
return files
def test_get_files():
try:
os.makedirs("test/folder1/a", exist_ok=True)
os.makedirs("test/folder2/b", exist_ok=True)
open("test/folder1/a/file", "w").close()
open("test/folder2/b/file", "w").close()
folders = ["test/folder1", "test/folder2"]
assert get_files(folders) == ["a/file", "b/file"]
finally:
shutil.rmtree("test")
def find_conflicting_files(files) -> List:
return [file for file, cnt in Counter(files).items() if cnt > 1]
def test_find_conflicting_files():
files = [
["a", "b", "c"],
["a", "b", "d"],
["a", "b", "e"],
["a", "b", "f"],
]
assert find_conflicting_files(files) == ["a", "a", "a", "b", "b", "b"]
def remove_duplicates(l: List) -> List:
return [*set(l)]
def test_remove_duplicates():
files = ["a", "a", "b", "b", "c", "c"]
assert remove_duplicates(files) == ["a", "b", "c"]
def print_conflicting_files(files):
for file in files:
print(file)
if __name__ == "__main__":
main()
Last edited by linuxscoop (2022-09-27 14:32:36)
Offline
#! /usr/bin/env python3
"""Find conflicting files."""
from itertools import combinations
from os import walk
from pathlib import Path
from typing import Iterable, Iterator
def conflicting_files(roots: Iterable[Path]) -> Iterator[tuple[Path, Path]]:
"""Yield conflicting files."""
files = {root: set(realtive_files(root)) for root in roots}
for (l_root, l_files), (r_root, r_files) in combinations(files.items(), 2):
for file in l_files & r_files:
yield l_root / file, r_root / file
def realtive_files(root: Path) -> Iterator[Path]:
"""Yield file paths relative to root."""
for base_dir, _, files in walk(root):
for node in files:
yield Path(base_dir).joinpath(node).relative_to(root)
def main():
"""Find conflicting files in folders in CWD."""
for lhs, rhs in conflicting_files(
filter(lambda node: node.is_dir(), Path.cwd().iterdir())
):
print(f'Files "{lhs}" and "{rhs}" are in conflict.')
if __name__ == '__main__':
main()
Alternative:
#! /usr/bin/env python3
"""Find conflicting files."""
from collections import defaultdict
from os import walk
from pathlib import Path
from typing import Iterable, Iterator
def conflicting_files(roots: Iterable[Path]) -> Iterator[tuple[Path, set[Path]]]:
"""Yield conflicting files."""
files = defaultdict(set)
for root in roots:
for file in realtive_files(root):
files[file].add(root)
for file, dirs in files.items():
if len(dirs) > 1:
yield file, dirs
def realtive_files(root: Path) -> Iterator[Path]:
"""Yield file paths relative to root."""
for base_dir, _, files in walk(root):
for node in files:
yield Path(base_dir).joinpath(node).relative_to(root)
def main():
"""Find conflicting files in folders in CWD."""
for file, roots in conflicting_files(
filter(lambda node: node.is_dir(), Path.cwd().iterdir())
):
print('Conflicting file:', file)
for root in roots:
print(' * in:', root)
if __name__ == '__main__':
main()
Last edited by schard (2022-09-26 14:40:23)
Offline
Does this need to be in python? If you just want to find duplicate file name/paths under multiple directories:
find dir1 dir2 -printf '%P\n' | sort | uniq -d
Last edited by Trilby (2022-09-26 19:40:30)
"UNIX is simple and coherent..." - Dennis Ritchie, "GNU's Not UNIX" - Richard Stallman
Offline