Skip to content

Commit

Permalink
use lines-changed-only when getting file changes
Browse files Browse the repository at this point in the history
move helper function to gh-specific API
  • Loading branch information
2bndy5 committed Jan 3, 2025
1 parent 1748f3f commit 3608480
Show file tree
Hide file tree
Showing 10 changed files with 208 additions and 145 deletions.
9 changes: 8 additions & 1 deletion cpp-linter/src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,13 +407,20 @@ pub fn convert_extra_arg_val(args: &ArgMatches) -> Vec<String> {
mod test {
use clap::ArgMatches;

use super::{convert_extra_arg_val, get_arg_parser};
use super::{convert_extra_arg_val, get_arg_parser, Cli};

fn parser_args(input: Vec<&str>) -> ArgMatches {
let arg_parser = get_arg_parser();
arg_parser.get_matches_from(input)
}

#[test]
fn ignore_blank_extensions() {
let args = parser_args(vec!["cpp-linter", "-e", "c,,h"]);
let cli = Cli::from(&args);
assert!(!cli.extensions.contains(&"".to_string()));
}

#[test]
fn extra_arg_0() {
let args = parser_args(vec!["cpp-linter"]);
Expand Down
17 changes: 16 additions & 1 deletion cpp-linter/src/cli/structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ impl LinesChangedOnly {
_ => LinesChangedOnly::Off,
}
}

pub fn is_change_valid(&self, added_lines: bool, diff_chunks: bool) -> bool {
match self {
LinesChangedOnly::Off => true,
LinesChangedOnly::Diff => diff_chunks,
LinesChangedOnly::On => added_lines,
}
}
}

impl Display for LinesChangedOnly {
Expand Down Expand Up @@ -91,7 +99,14 @@ impl From<&ArgMatches> for Cli {
let extensions = args
.get_many::<String>("extensions")
.unwrap()
.map(|s| s.to_string())
.filter_map(|s| {
if s.is_empty() {
// filter out blank extensions here
None
} else {
Some(s.to_string())
}
})
.collect::<Vec<_>>();

Self {
Expand Down
24 changes: 9 additions & 15 deletions cpp-linter/src/common_fs/file_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,23 +129,17 @@ impl FileFilter {
/// - Is `entry` specified in the list of explicitly `not_ignored` paths? (supersedes
/// specified `ignored` paths)
pub fn is_source_or_ignored(&self, entry: &Path) -> bool {
let extension = entry.extension();
if extension.is_none() {
let extension = entry
.extension()
.unwrap_or_default() // allow for matching files with no extension
.to_string_lossy()
.to_string();
if !self.extensions.contains(&extension) {
return false;
}
let mut is_ignored = true;
for ext in &self.extensions {
if ext == &extension.unwrap().to_os_string().into_string().unwrap() {
is_ignored = false;
break;
}
}
if !is_ignored {
let is_in_ignored = self.is_file_in_list(entry, true);
let is_in_not_ignored = self.is_file_in_list(entry, false);
if is_in_not_ignored || !is_in_ignored {
return true;
}
let is_in_not_ignored = self.is_file_in_list(entry, false);
if is_in_not_ignored || !self.is_file_in_list(entry, true) {
return true;
}
false
}
Expand Down
60 changes: 49 additions & 11 deletions cpp-linter/src/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ use anyhow::{Context, Result};
use git2::{Diff, Error, Patch, Repository};

// project specific modules/crates
use crate::common_fs::{FileFilter, FileObj};
use crate::{
cli::LinesChangedOnly,
common_fs::{FileFilter, FileObj},
};

/// This (re-)initializes the repository located in the specified `path`.
///
Expand Down Expand Up @@ -100,7 +103,11 @@ fn parse_patch(patch: &Patch) -> (Vec<u32>, Vec<RangeInclusive<u32>>) {
///
/// The specified list of `extensions`, `ignored` and `not_ignored` files are used as
/// filters to expedite the process and only focus on the data cpp_linter can use.
pub fn parse_diff(diff: &git2::Diff, file_filter: &FileFilter) -> Vec<FileObj> {
pub fn parse_diff(
diff: &git2::Diff,
file_filter: &FileFilter,
lines_changed_only: &LinesChangedOnly,
) -> Vec<FileObj> {
let mut files: Vec<FileObj> = Vec::new();
for file_idx in 0..diff.deltas().count() {
let diff_delta = diff.get_delta(file_idx).unwrap();
Expand All @@ -112,7 +119,10 @@ pub fn parse_diff(diff: &git2::Diff, file_filter: &FileFilter) -> Vec<FileObj> {
{
let (added_lines, diff_chunks) =
parse_patch(&Patch::from_diff(diff, file_idx).unwrap().unwrap());
files.push(FileObj::from(file_path, added_lines, diff_chunks));
if lines_changed_only.is_change_valid(!added_lines.is_empty(), !diff_chunks.is_empty())
{
files.push(FileObj::from(file_path, added_lines, diff_chunks));
}
}
}
files
Expand All @@ -125,12 +135,20 @@ pub fn parse_diff(diff: &git2::Diff, file_filter: &FileFilter) -> Vec<FileObj> {
/// log warning and error are output when this occurs. Please report this instance for
/// troubleshooting/diagnosis as this likely means the diff is malformed or there is a
/// bug in libgit2 source.
pub fn parse_diff_from_buf(buff: &[u8], file_filter: &FileFilter) -> Vec<FileObj> {
pub fn parse_diff_from_buf(
buff: &[u8],
file_filter: &FileFilter,
lines_changed_only: &LinesChangedOnly,
) -> Vec<FileObj> {
if let Ok(diff_obj) = &Diff::from_buffer(buff) {
parse_diff(diff_obj, file_filter)
parse_diff(diff_obj, file_filter, lines_changed_only)
} else {
log::warn!("libgit2 failed to parse the diff");
brute_force_parse_diff::parse_diff(&String::from_utf8_lossy(buff), file_filter)
brute_force_parse_diff::parse_diff(
&String::from_utf8_lossy(buff),
file_filter,
lines_changed_only,
)
}
}

Expand All @@ -146,7 +164,10 @@ mod brute_force_parse_diff {
use regex::Regex;
use std::{ops::RangeInclusive, path::PathBuf};

use crate::common_fs::{FileFilter, FileObj};
use crate::{
cli::LinesChangedOnly,
common_fs::{FileFilter, FileObj},
};

fn get_filename_from_front_matter(front_matter: &str) -> Option<&str> {
let diff_file_name = Regex::new(r"(?m)^\+\+\+\sb?/(.*)$").unwrap();
Expand Down Expand Up @@ -209,7 +230,11 @@ mod brute_force_parse_diff {
(additions, diff_chunks)
}

pub fn parse_diff(diff: &str, file_filter: &FileFilter) -> Vec<FileObj> {
pub fn parse_diff(
diff: &str,
file_filter: &FileFilter,
lines_changed_only: &LinesChangedOnly,
) -> Vec<FileObj> {
log::error!("Using brute force diff parsing!");
let mut results = Vec::new();
let diff_file_delimiter = Regex::new(r"(?m)^diff --git a/.*$").unwrap();
Expand All @@ -230,7 +255,11 @@ mod brute_force_parse_diff {
let file_path = PathBuf::from(file_name);
if file_filter.is_source_or_ignored(&file_path) {
let (added_lines, diff_chunks) = parse_patch(&file_diff[hunk_start..]);
results.push(FileObj::from(file_path, added_lines, diff_chunks));
if lines_changed_only
.is_change_valid(!added_lines.is_empty(), !diff_chunks.is_empty())
{
results.push(FileObj::from(file_path, added_lines, diff_chunks));
}
}
}
// } else {
Expand All @@ -247,6 +276,7 @@ mod brute_force_parse_diff {

use super::parse_diff;
use crate::{
cli::LinesChangedOnly,
common_fs::{FileFilter, FileObj},
git::parse_diff_from_buf,
};
Expand Down Expand Up @@ -274,6 +304,7 @@ rename to /tests/demo/some source.c
let files = parse_diff_from_buf(
diff_buf,
&FileFilter::new(&["target".to_string()], vec!["c".to_string()]),
&LinesChangedOnly::Off,
);
assert!(!files.is_empty());
assert!(files
Expand All @@ -289,6 +320,7 @@ rename to /tests/demo/some source.c
let files = parse_diff_from_buf(
diff_buf,
&FileFilter::new(&["target".to_string()], vec!["c".to_string()]),
&LinesChangedOnly::Off,
);
assert!(!files.is_empty());
}
Expand All @@ -301,8 +333,13 @@ rename to /tests/demo/some source.c
parse_diff_from_buf(
buf.as_bytes(),
&FileFilter::new(&ignore, extensions.to_owned()),
&LinesChangedOnly::Off,
),
parse_diff(
buf,
&FileFilter::new(&ignore, extensions.to_owned()),
&LinesChangedOnly::Off,
),
parse_diff(buf, &FileFilter::new(&ignore, extensions.to_owned())),
)
}

Expand Down Expand Up @@ -377,6 +414,7 @@ mod test {
use tempfile::{tempdir, TempDir};

use crate::{
cli::LinesChangedOnly,
common_fs::FileFilter,
rest_api::{github::GithubApiClient, RestApiClient},
};
Expand Down Expand Up @@ -406,7 +444,7 @@ mod test {
env::set_var("CI", "false"); // avoid use of REST API when testing in CI
rest_api_client
.unwrap()
.get_list_of_changed_files(&file_filter)
.get_list_of_changed_files(&file_filter, &LinesChangedOnly::Off)
.await
.unwrap()
}
Expand Down
76 changes: 16 additions & 60 deletions cpp-linter/src/rest_api/github/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,19 @@ use reqwest::{
header::{HeaderMap, HeaderValue, AUTHORIZATION},
Client, Method, Url,
};
use serde_json;

// project specific modules/crates
use super::{RestApiClient, RestApiRateLimitHeaders};
use crate::clang_tools::clang_format::tally_format_advice;
use crate::clang_tools::clang_tidy::tally_tidy_advice;
use crate::clang_tools::ClangVersions;
use crate::cli::{FeedbackInput, ThreadComments};
use crate::cli::{FeedbackInput, LinesChangedOnly, ThreadComments};
use crate::common_fs::{FileFilter, FileObj};
use crate::git::{get_diff, open_repo, parse_diff, parse_diff_from_buf};

// private submodules.
mod serde_structs;
mod specific_api;
use serde_structs::{GithubChangedFile, PushEventFiles};

/// A structure to work with Github REST API.
pub struct GithubApiClient {
Expand Down Expand Up @@ -121,7 +119,11 @@ impl RestApiClient for GithubApiClient {
Ok(headers)
}

async fn get_list_of_changed_files(&self, file_filter: &FileFilter) -> Result<Vec<FileObj>> {
async fn get_list_of_changed_files(
&self,
file_filter: &FileFilter,
lines_changed_only: &LinesChangedOnly,
) -> Result<Vec<FileObj>> {
if env::var("CI").is_ok_and(|val| val.as_str() == "true")
&& self.repo.is_some()
&& self.sha.is_some()
Expand Down Expand Up @@ -153,9 +155,13 @@ impl RestApiClient for GithubApiClient {
0,
)
.await
.with_context(|| "Failed to get list of changed files from GitHub server.")?;
.with_context(|| "Failed to get list of changed files.")?;
if response.status().is_success() {
Ok(parse_diff_from_buf(&response.bytes().await?, file_filter))
Ok(parse_diff_from_buf(
&response.bytes().await?,
file_filter,
lines_changed_only,
))
} else {
let endpoint = if is_pr {
Url::parse(format!("{}/files", url.as_str()).as_str())?
Expand All @@ -164,69 +170,19 @@ impl RestApiClient for GithubApiClient {
};
Self::log_response(response, "Failed to get full diff for event").await;
log::debug!("Trying paginated request to {}", endpoint.as_str());
self.get_changed_files_paginated(endpoint, file_filter)
self.get_changed_files_paginated(endpoint, file_filter, lines_changed_only)
.await
}
} else {
// get diff from libgit2 API
let repo = open_repo(".").with_context(|| {
"Please ensure the repository is checked out before running cpp-linter."
})?;
let list = parse_diff(&get_diff(&repo)?, file_filter);
let list = parse_diff(&get_diff(&repo)?, file_filter, lines_changed_only);
Ok(list)
}
}

async fn get_changed_files_paginated(
&self,
url: Url,
file_filter: &FileFilter,
) -> Result<Vec<FileObj>> {
let mut url = Some(Url::parse_with_params(url.as_str(), &[("page", "1")])?);
let mut files = vec![];
while let Some(ref endpoint) = url {
let request =
Self::make_api_request(&self.client, endpoint.as_str(), Method::GET, None, None)?;
let response = Self::send_api_request(
self.client.clone(),
request,
self.rate_limit_headers.clone(),
0,
)
.await;
if let Ok(response) = response {
url = Self::try_next_page(response.headers());
let files_list = if self.event_name != "pull_request" {
let json_value: PushEventFiles = serde_json::from_str(&response.text().await?)
.with_context(|| {
"Failed to deserialize list of changed files from json response"
})?;
json_value.files
} else {
serde_json::from_str::<Vec<GithubChangedFile>>(&response.text().await?)
.with_context(|| {
"Failed to deserialize list of file changes from Pull Request event."
})?
};
for file in files_list {
if let Some(patch) = file.patch {
let diff = format!(
"diff --git a/{old} b/{new}\n--- a/{old}\n+++ b/{new}\n{patch}",
old = file.previous_filename.unwrap_or(file.filename.clone()),
new = file.filename,
);
if let Some(file_obj) =
parse_diff_from_buf(diff.as_bytes(), file_filter).first()
{
files.push(file_obj.to_owned());
}
}
}
}
}
Ok(files)
}

async fn post_feedback(
&self,
files: &[Arc<Mutex<FileObj>>],
Expand Down Expand Up @@ -319,7 +275,7 @@ mod test {
clang_tidy::{TidyAdvice, TidyNotification},
ClangVersions,
},
cli::FeedbackInput,
cli::{FeedbackInput, LinesChangedOnly},
common_fs::{FileFilter, FileObj},
logger,
rest_api::{RestApiClient, USER_OUTREACH},
Expand Down Expand Up @@ -478,7 +434,7 @@ mod test {
env::set_current_dir(tmp_dir.path()).unwrap();
let rest_client = GithubApiClient::new().unwrap();
let files = rest_client
.get_list_of_changed_files(&FileFilter::new(&[], vec![]))
.get_list_of_changed_files(&FileFilter::new(&[], vec![]), &LinesChangedOnly::Off)
.await;
assert!(files.is_err())
}
Expand Down
Loading

0 comments on commit 3608480

Please sign in to comment.