Skip to content

Commit

Permalink
Merge pull request #170 from salesforce/output-search-information
Browse files Browse the repository at this point in the history
Output search result / processed repo numbers
  • Loading branch information
justinharringa authored Aug 6, 2020
2 parents 584c909 + 2a3170a commit f7aa730
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public Multimap<String, GitHubContentToProcess> forkRepositoriesFoundAndGetPathT
Multimap<String, GitHubContentToProcess> pathToDockerfilesInParentRepo = HashMultimap.create();
GHRepository parent;
String parentRepoName;
int totalContentsFound = 0;
int contentsShouldFork = 0;
for (GHContent ghContent : contentsWithImage) {
/* Kohsuke's GitHub API library, when retrieving the forked repository, looks at the name of the parent to
* retrieve. The issue with that is: GitHub, when forking two or more repositories with the same name,
Expand All @@ -48,13 +50,15 @@ public Multimap<String, GitHubContentToProcess> forkRepositoriesFoundAndGetPathT
* repositories that were automatically fixed by GitHub. Instead, we save the names of the parent repos
* in the map above, find the list of repositories under the authorized user, and iterate through that list.
*/
totalContentsFound++;
parent = ghContent.getOwner();
parentRepoName = parent.getFullName();
// Refresh the repo to ensure that the object has full details
try {
parent = dockerfileGitHubUtil.getRepo(parentRepoName);
ShouldForkResult shouldForkResult = forkableRepoValidator.shouldFork(parent, ghContent, gitForkBranch);
if (shouldForkResult.isForkable()) {
contentsShouldFork++;
// fork the parent if not already forked
ensureForkedAndAddToListForProcessing(pathToDockerfilesInParentRepo, parent, parentRepoName, ghContent);
} else {
Expand All @@ -65,6 +69,8 @@ public Multimap<String, GitHubContentToProcess> forkRepositoriesFoundAndGetPathT
}
}

log.info("Out of {} content search results processed, {} were deemed eligible for forking to yield {} repositories to fork.",
totalContentsFound, contentsShouldFork, pathToDockerfilesInParentRepo.keys().size());
log.info("Path to Dockerfiles in repos: {}", pathToDockerfilesInParentRepo);

return pathToDockerfilesInParentRepo;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ public GHRepository getRepo(String repoName) throws IOException {
public PagedSearchIterable<GHContent> findFilesWithImage(String query, String org) throws IOException {
GHContentSearchBuilder search = gitHubUtil.startSearch();
// Filename search appears to yield better / more results than language:Dockerfile
// Root cause: linguist doesn't currently deal with prefixes of files:
// https://github.com/github/linguist/issues/4566
search.filename("Dockerfile");
if (org != null) {
search.user(org);
Expand All @@ -82,7 +84,10 @@ public PagedSearchIterable<GHContent> findFilesWithImage(String query, String or
log.debug("Searching for {}", query);
PagedSearchIterable<GHContent> files = search.list();
int totalCount = files.getTotalCount();
log.debug("Number of files found for {}:{}", query, totalCount);
if (totalCount > 1000) {
log.warn("Number of search results is above 1000! The GitHub Search API will only return around 1000 results - https://developer.github.com/v3/search/#about-the-search-api");
}
log.info("Number of files found for {}:{}", query, totalCount);
return files;
}

Expand Down

0 comments on commit f7aa730

Please sign in to comment.