You are not logged in.

#1 2022-09-26 12:34:20

linuxscoop
Member
Registered: 2022-08-09
Posts: 26

[SOLV]How to get conflicting files with same relative paths in python?

I've made a virtual file system from a few folders and want to check if there are any conflicting files.

/mnt/T:/mnt/V:/mnt/W:/mnt/X:/mnt/Y:/mnt/Z:/srv/nfs/lenovo/hdd1  /mnt/storage      fuse.mergerfs  allow_other,use_ino,cache.files=off,dropcacheonclose=true,x-gvfs-show   0       0

So I want to provide two or more folders and get files with the same path relative to their folders.

Usage: python file_conflicts.py <dir>... > conflicts.txt

How can I find the conflicts?

This is what I've done so far. The get_files and remove_duplicates functions aren't working as I expected.

import os
import shutil
import sys

from collections import Counter
from pathlib import Path
from typing import List


def main():
    folders = sys.argv[1:]

    if len(folders) < 2:
        print("Please provide at least 2 folders")
        exit(1)

    files = get_files(folders)
    conflicting_files = find_conflicting_files(files)
    conflicting_files = remove_duplicates(conflicting_files)
    print_conflicting_files(conflicting_files)


def get_files(folders):
    files = []
    for folder in folders:
        files.extend([os.path.relpath(path, folder) for path in Path(folder).rglob("*")])
    return files


def test_get_files():
    try:
        os.makedirs("test/folder1/a", exist_ok=True)
        os.makedirs("test/folder2/b", exist_ok=True)
        open("test/folder1/a/file", "w").close()
        open("test/folder2/b/file", "w").close()

        folders = ["test/folder1", "test/folder2"]
        assert get_files(folders) == ["a/file", "b/file"]
    finally:
        shutil.rmtree("test")


def find_conflicting_files(files) -> List:
    return [file for file, cnt in Counter(files).items() if cnt > 1]


def test_find_conflicting_files():
    files = [
        ["a", "b", "c"],
        ["a", "b", "d"],
        ["a", "b", "e"],
        ["a", "b", "f"],
    ]

    assert find_conflicting_files(files) == ["a", "a", "a", "b", "b", "b"]


def remove_duplicates(l: List) -> List:
    return [*set(l)]


def test_remove_duplicates():
    files = ["a", "a", "b", "b", "c", "c"]
    assert remove_duplicates(files) == ["a", "b", "c"]


def print_conflicting_files(files):
    for file in files:
        print(file)


if __name__ == "__main__":
    main()

Last edited by linuxscoop (2022-09-27 14:32:36)

Offline

#2 2022-09-26 14:29:45

schard
Member
From: Hannover
Registered: 2016-05-06
Posts: 1,933
Website

Re: [SOLV]How to get conflicting files with same relative paths in python?

#! /usr/bin/env python3
"""Find conflicting files."""

from itertools import combinations
from os import walk
from pathlib import Path
from typing import Iterable, Iterator


def conflicting_files(roots: Iterable[Path]) -> Iterator[tuple[Path, Path]]:
    """Yield conflicting files."""

    files = {root: set(realtive_files(root)) for root in roots}

    for (l_root, l_files), (r_root, r_files) in combinations(files.items(), 2):
        for file in l_files & r_files:
            yield l_root / file, r_root / file


def realtive_files(root: Path) -> Iterator[Path]:
    """Yield file paths relative to root."""

    for base_dir, _, files in walk(root):
        for node in files:
            yield Path(base_dir).joinpath(node).relative_to(root)


def main():
    """Find conflicting files in folders in CWD."""

    for lhs, rhs in conflicting_files(
            filter(lambda node: node.is_dir(), Path.cwd().iterdir())
    ):
        print(f'Files "{lhs}" and "{rhs}" are in conflict.')


if __name__ == '__main__':
    main()

Alternative:

#! /usr/bin/env python3
"""Find conflicting files."""

from collections import defaultdict
from os import walk
from pathlib import Path
from typing import Iterable, Iterator


def conflicting_files(roots: Iterable[Path]) -> Iterator[tuple[Path, set[Path]]]:
    """Yield conflicting files."""

    files = defaultdict(set)

    for root in roots:
        for file in realtive_files(root):
            files[file].add(root)

    for file, dirs in files.items():
        if len(dirs) > 1:
            yield file, dirs


def realtive_files(root: Path) -> Iterator[Path]:
    """Yield file paths relative to root."""

    for base_dir, _, files in walk(root):
        for node in files:
            yield Path(base_dir).joinpath(node).relative_to(root)


def main():
    """Find conflicting files in folders in CWD."""

    for file, roots in conflicting_files(
            filter(lambda node: node.is_dir(), Path.cwd().iterdir())
    ):
        print('Conflicting file:', file)

        for root in roots:
            print('  * in:', root)


if __name__ == '__main__':
    main()

Last edited by schard (2022-09-26 14:40:23)

Offline

#3 2022-09-26 15:10:25

Trilby
Inspector Parrot
Registered: 2011-11-29
Posts: 29,447
Website

Re: [SOLV]How to get conflicting files with same relative paths in python?

Does this need to be in python?  If you just want to find duplicate file name/paths under multiple directories:

find dir1 dir2 -printf '%P\n' | sort | uniq -d

Last edited by Trilby (2022-09-26 19:40:30)


"UNIX is simple and coherent..." - Dennis Ritchie, "GNU's Not UNIX" -  Richard Stallman

Offline

Board footer

Powered by FluxBB