extracted function

This commit is contained in:
Karma Riuk
2025-03-14 15:03:32 +01:00
parent 4544922165
commit 8f854cfa09
2 changed files with 53 additions and 53 deletions

View File

@ -1,11 +1,10 @@
import pandas as pd
import argparse, os, sys, docker
import argparse, os, docker
from tqdm import tqdm
import shutil
from typing import Optional
from datetime import datetime
from handlers import GradleHandler, MavenHandler, BuildHandler
from handlers import get_build_handler
from utils import clone
tqdm.pandas()
@ -18,55 +17,6 @@ EXCLUSION_LIST = [
"Starcloud-Cloud/starcloud-llmops", # requires authentication
]
def get_build_handler(root: str, repo: str, updates: dict, verbose: bool = False) -> Optional[BuildHandler]:
"""
Get the path to the build file of a repository. The build file is either a
`pom.xml`, `build.gradle`, or `build.xml` file.
Args:
root (str): The root directory in which the repository is located.
repo (str): The name of the repository.
Returns:
str | None: The path to the repository if it is valid, `None` otherwise
"""
path = os.path.join(root, repo)
# Check if the given path is a directory
if not os.path.isdir(path):
error_msg = f"The path {path} is not a valid directory."
print(error_msg, file=sys.stderr)
updates["error_msg"] = error_msg
return None
to_keep = ["pom.xml", "build.gradle"]
for entry in os.scandir(path):
if entry.is_file() and entry.name in to_keep:
if verbose: print(f"Found {entry.name} in {repo} root, so keeping it and returning")
updates["depth_of_build_file"] = 0
if entry.name == "build.gradle":
updates["build_system"] = "gradle"
return GradleHandler(path, entry.name, updates)
else:
updates["build_system"] = "maven"
return MavenHandler(path, entry.name, updates)
# List files in the immediate subdirectories
for entry in os.scandir(path):
if entry.is_dir():
for sub_entry in os.scandir(entry.path):
if sub_entry.is_file() and sub_entry.name in to_keep:
if verbose: print(f"Found {sub_entry.name} in {repo} first level, so keeping it and returning")
updates["depth_of_build_file"] = 1
if entry.name == "build.gradle":
updates["build_system"] = "gradle"
return GradleHandler(path, os.path.join(entry.name, sub_entry.name), updates)
else:
updates["build_system"] = "maven"
return MavenHandler(path, os.path.join(entry.name, sub_entry.name), updates)
updates["error_msg"] = "No build file found"
return None
def remove_dir(dir: str) -> None:
"""
Removes a directory and all its contents. Removes parent directorie if it is empty after removing child (dir).

View File

@ -1,6 +1,7 @@
from abc import ABC, abstractmethod
import os, re, docker, signal
import os, re, docker, signal, sys
from bs4 import BeautifulSoup
from typing import Optional
USER_ID = os.getuid() # for container user
GROUP_ID = os.getgid()
@ -297,3 +298,52 @@ def clean_output(output: bytes) -> str:
cleaned_lines = merge_unapproved_licences(cleaned_lines)
return "\n".join(cleaned_lines)
def get_build_handler(root: str, repo: str, updates: dict, verbose: bool = False) -> Optional[BuildHandler]:
"""
Get the path to the build file of a repository. The build file is either a
`pom.xml`, `build.gradle`, or `build.xml` file.
Args:
root (str): The root directory in which the repository is located.
repo (str): The name of the repository.
Returns:
str | None: The path to the repository if it is valid, `None` otherwise
"""
path = os.path.join(root, repo)
# Check if the given path is a directory
if not os.path.isdir(path):
error_msg = f"The path {path} is not a valid directory."
print(error_msg, file=sys.stderr)
updates["error_msg"] = error_msg
return None
to_keep = ["pom.xml", "build.gradle"]
for entry in os.scandir(path):
if entry.is_file() and entry.name in to_keep:
if verbose: print(f"Found {entry.name} in {repo} root, so keeping it and returning")
updates["depth_of_build_file"] = 0
if entry.name == "build.gradle":
updates["build_system"] = "gradle"
return GradleHandler(path, entry.name, updates)
else:
updates["build_system"] = "maven"
return MavenHandler(path, entry.name, updates)
# List files in the immediate subdirectories
for entry in os.scandir(path):
if entry.is_dir():
for sub_entry in os.scandir(entry.path):
if sub_entry.is_file() and sub_entry.name in to_keep:
if verbose: print(f"Found {sub_entry.name} in {repo} first level, so keeping it and returning")
updates["depth_of_build_file"] = 1
if entry.name == "build.gradle":
updates["build_system"] = "gradle"
return GradleHandler(path, os.path.join(entry.name, sub_entry.name), updates)
else:
updates["build_system"] = "maven"
return MavenHandler(path, os.path.join(entry.name, sub_entry.name), updates)
updates["error_msg"] = "No build file found"
return None