diff --git a/backend/controllers/repos_controller.go b/backend/controllers/repos_controller.go index f9cd225..2f7685a 100644 --- a/backend/controllers/repos_controller.go +++ b/backend/controllers/repos_controller.go @@ -44,17 +44,15 @@ func CreateRepo(w http.ResponseWriter, r *http.Request) { repo.LastUpdated = utils.GetCurrentTime() } - - filter := utils.ConstructFilters(r, types.Repo{}) - - // Get data from database - data, err := db.GetReposByFilters(filter) - if err != nil { - log.Println(err) - } - if len(data) > 0 { - http.Error(w, "Error - duplicate entry", http.StatusBadRequest); - return + // If the repo already exists, remove the old repo and insert the new one + oldRepo, err := db.GetRepoByName(repo.Name) + if err == nil { + // Delete old repo + _, err = db.DeleteRepoByName(oldRepo.Name) + if err != nil { + http.Error(w, "Error deleting old repo", http.StatusInternalServerError) + return + } } // Insert repo into database diff --git a/backend/db/repos_db.go b/backend/db/repos_db.go index a91f380..1d70343 100644 --- a/backend/db/repos_db.go +++ b/backend/db/repos_db.go @@ -5,6 +5,7 @@ import ( "log" "github.com/g00gol/frieren/backend/types" + "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo/options" ) @@ -31,3 +32,33 @@ func GetReposByFilters(filter any) ([]types.Repo, error) { return data, err } + +func GetRepoByName(name string) (types.Repo, error) { + collection := GetCollection("repos") + + var data types.Repo + + filter := bson.D{{Key: "name", Value: name}} + err := collection.FindOne(context.TODO(), filter).Decode(&data) + + if err != nil { + log.Println("Error finding repo:", err) + return types.Repo{}, err + } + + log.Println("Found repo:", data) + return data, err +} + +func DeleteRepoByName(name string) (int64, error) { + collection := GetCollection("repos") + + filter := bson.D{{Key: "name", Value: name}} + result, err := collection.DeleteOne(context.TODO(), filter) + if err != nil { + log.Println("Error deleting repo:", err) + return 0, err + } + + return result.DeletedCount, err +} diff --git a/backend/types/repo.go b/backend/types/repo.go index ea2a702..ee6554f 100644 --- a/backend/types/repo.go +++ b/backend/types/repo.go @@ -3,17 +3,17 @@ package types import "time" type Repo struct { - Hash string `bson:"hash"` - Name string `bson:"name"` - Description string `bson:"description"` - RepoOrigin string `bson:"repo_origin"` - FernBranch string `bson:"fern_branch"` - Languages []string `bson:"languages"` - Technologies []string `bson:"technologies"` - RecommendedIssueLabels []string `bson:"recommended_issue_labels"` - RecommendedIssuesCount int `bson:"recommended_issues_count"` - Difficulty int `bson:"difficulty"` - LastUpdated time.Time `bson:"last_updated"` // MongoDB Datetime - DateCreated time.Time `bson:"date_created"` // MongoDB Datetime - Stars int `bson:"stars"` + Hash string `bson:"hash" json:"hash"` + Name string `bson:"name" json:"name"` + Description string `bson:"description" json:"description"` + RepoOrigin string `bson:"repo_origin" json:"repo_origin"` + FernBranch string `bson:"fern_branch" json:"fern_branch"` + Languages []string `bson:"languages" json:"languages"` + Technologies []string `bson:"technologies" json:"technologies"` + RecommendedIssueLabels []string `bson:"recommended_issue_labels" json:"recommended_issue_labels"` + RecommendedIssuesCount int `bson:"recommended_issues_count" json:"recommended_issues_count"` + Difficulty int `bson:"difficulty" json:"difficulty"` + LastUpdated time.Time `bson:"last_updated" json:"last_updated"` + DateCreated time.Time `bson:"date_created" json:"date_created"` + Stars int `bson:"stars" json:"stars"` } diff --git a/frieren-cli/frieren-cli.py b/frieren-cli/frieren-cli.py index 9147c7f..c5b3db3 100755 --- a/frieren-cli/frieren-cli.py +++ b/frieren-cli/frieren-cli.py @@ -32,7 +32,10 @@ fern = {"name": re.search(r"/([^/]*/[^/]*)$","https://github.com/g00gol/frieren").group(1), "technologies": technologies, "difficulty": difficulty, "description": desc, "recommended_issue_labels": recommended_issue_labels} - fern['repo_origin'] = origin + fern['repo_origin'] = origin + fern['fern_branch'] = repo.active_branch.name + + print(fern) # Make api call r = requests.post("http://127.0.0.1:8080/repos", json=fern) @@ -41,4 +44,5 @@ else: with open("open-source.fern", "w+") as f: del fern['repo_origin'] + del fern['fern_branch'] json.dump(fern, f) \ No newline at end of file diff --git a/open-source.fern b/open-source.fern old mode 100755 new mode 100644 index a3c3157..8551f12 --- a/open-source.fern +++ b/open-source.fern @@ -1 +1 @@ -{"name": "g00gol/frieren", "technologies": ["MongoDB", "Rust", "Go", "Python", "JavaScript"], "difficulty": 3, "description": "Open source project finder", "recommended_issue_labels": ["easy", "start-here", "Good First Issue"]} +{"name": "g00gol/frieren", "technologies": ["MongoDB"], "difficulty": 3, "description": "test", "recommended_issue_labels": ["good first issue"]} \ No newline at end of file diff --git a/server-update-tool/Cargo.lock b/server-update-tool/Cargo.lock index 62cda38..16d6d75 100644 --- a/server-update-tool/Cargo.lock +++ b/server-update-tool/Cargo.lock @@ -29,6 +29,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "aho-corasick" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab" +dependencies = [ + "memchr", +] + [[package]] name = "android-tzdata" version = "0.1.1" @@ -328,6 +337,19 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "env_logger" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0" +dependencies = [ + "humantime", + "is-terminal", + "log", + "regex", + "termcolor", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -403,6 +425,21 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.28" @@ -410,6 +447,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -464,9 +502,11 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ + "futures-channel", "futures-core", "futures-io", "futures-macro", + "futures-sink", "futures-task", "memchr", "pin-project-lite", @@ -604,6 +644,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "hyper" version = "0.14.27" @@ -729,6 +775,17 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" +[[package]] +name = "is-terminal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" +dependencies = [ + "hermit-abi", + "rustix", + "windows-sys", +] + [[package]] name = "itoa" version = "1.0.9" @@ -900,6 +957,12 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "more-asserts" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fafa6961cabd9c63bcd77a45d7e3b7f3b552b70417831fb0f56db717e72407e" + [[package]] name = "native-tls" version = "0.2.11" @@ -1127,6 +1190,35 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "regex" +version = "1.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebee201405406dbf528b8b672104ae6d6d63e6d118cb10e4d51abbc7b58044ff" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + [[package]] name = "reqwest" version = "0.11.22" @@ -1425,8 +1517,12 @@ dependencies = [ "base64 0.21.4", "bson", "chrono", + "env_logger", + "futures", + "log", "md5", "mongodb", + "more-asserts", "reqwest", "serde", "serde_json", @@ -1597,6 +1693,15 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "termcolor" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64" +dependencies = [ + "winapi-util", +] + [[package]] name = "thiserror" version = "1.0.49" @@ -1997,6 +2102,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/server-update-tool/Cargo.toml b/server-update-tool/Cargo.toml index 4d28c9b..24e7813 100644 --- a/server-update-tool/Cargo.toml +++ b/server-update-tool/Cargo.toml @@ -15,4 +15,8 @@ chrono = "0.4" serde = "1" url = "2.4" reqwest = { version = "0.11", features = ["json"] } -serde_json = "1.0.107" \ No newline at end of file +serde_json = "1.0.107" +futures = "0.3" +log = "0.4" +env_logger = "0.10" +more-asserts = "0.3" \ No newline at end of file diff --git a/server-update-tool/src/db.rs b/server-update-tool/src/db.rs index 6e48840..1b9b12b 100644 --- a/server-update-tool/src/db.rs +++ b/server-update-tool/src/db.rs @@ -4,6 +4,7 @@ use chrono::{DateTime, Utc}; use std::env; use std::error::Error; use serde::{Serialize, Deserialize}; +use log::{debug}; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Repo { @@ -48,20 +49,24 @@ async fn get_repos_collection() -> Result, Box> { } pub async fn get_repos() -> Result, Box> { + debug!("Attempting to query DB"); let collection = get_repos_collection().await?; let cursor = collection.find(None, None).await?; + debug!("Successully queried DB"); return Ok(cursor); } -pub async fn update_repo(id: &ObjectId, new_repo: &Repo) -> Result<(), Box> { +pub async fn update_repo(new_repo: &Repo) -> Result<(), Box> { + debug!("Attempting to update DB"); let col = get_repos_collection().await?; - let update_result = col.find_one_and_replace( + col.find_one_and_replace( doc!{ - "_id": id + "_id": new_repo._id }, new_repo, FindOneAndReplaceOptions::builder().build() ).await?; + debug!("Successfully updated DB"); return Ok(()); } diff --git a/server-update-tool/src/github.rs b/server-update-tool/src/github.rs index 26e63a8..2323ea1 100644 --- a/server-update-tool/src/github.rs +++ b/server-update-tool/src/github.rs @@ -2,15 +2,18 @@ use std::error::Error; use url::Url; use reqwest; use reqwest::header::{ACCEPT, USER_AGENT}; -use std::collections::HashMap; use serde::{Serialize, Deserialize}; -use serde_json; +use serde_json::{self, Value}; // use mongodb::bson::DateTime; -use chrono::{DateTime}; +use chrono::{DateTime, Utc}; use mongodb::bson::oid::ObjectId; - +use reqwest::Response; use md5; use crate::db; +use futures::future::{BoxFuture, FutureExt}; +use std::{thread, time}; +use log::debug; +use more_asserts::assert_ge; #[derive(Debug, Serialize, Deserialize)] pub struct Links { @@ -49,7 +52,7 @@ fn get_path_segments_from_url(remote_url: &str) -> Result, Box Result> return Ok(path_segments[1].to_string()); } +async fn get_request_wrapper(url: &String) -> Result> { + + let sleep_duration = time::Duration::from_millis(5000); + + debug!("Attempting to make request to {}", url); + + for n in 1..3 { // we put the hack in hackathon + let response = reqwest::Client::new() + .get(url) + .header(USER_AGENT, "Frieren API") + .send() + .await?; + match response.status().as_u16() { + 403 => { + let timestamp = response.headers()["x-ratelimit-reset"].to_str()?.parse::()?; + let now = chrono::offset::Utc::now().timestamp(); + let sleep_duration = time::Duration::from_millis(((timestamp - now) * 1000).try_into().unwrap()); + debug!("Rate limiter. Sleeping {}ms", sleep_duration.as_millis()); + thread::sleep(sleep_duration); + }, + 200 => { + debug!("Successfully made request to {}", url); + return Ok(response) + } + _ => return Err("Error while querying URL".into()) + } + } + debug!("Failed to request {url}. Too many requests"); + + return Err("Rate limiter too strong".into()); // We put the hack in hackathon +} + pub async fn get_fern_file(remote_uri: &String, branch_name: Option<&String>) -> Result> { + debug!("Getting fern file"); let repo_owner = get_repo_owner_from_url(&remote_uri)?; let repo_name = get_repo_name_from_url(&remote_uri)?; @@ -74,58 +110,37 @@ pub async fn get_fern_file(remote_uri: &String, branch_name: Option<&String>) -> None => format!("https://api.github.com/repos/{}/{}/contents/open-source.fern", repo_owner, repo_name) }; - let file: GithubFile = reqwest::Client::new() - .get(github_uri) - .header(USER_AGENT, "Frieren API") - .send() + let file: GithubFile = get_request_wrapper(&github_uri) .await? .json() .await?; + debug!("Successfully got fern file"); return Ok(file); } -pub async fn get_created_at_time(remote_url: &String) -> Result> { - let repo_owner = get_repo_owner_from_url(remote_url)?; - let repo_name = get_repo_name_from_url(remote_url)?; - - let github_uri = format!("https://api.github.com/repos/{repo_owner}/{repo_name}"); - - let json: serde_json::Value = reqwest::Client::new() - .get(github_uri) - .header(USER_AGENT, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36") - .send() - .await? - .json() - .await?; +pub async fn get_created_at_time(repo_metadata: &Value) -> Result> { + debug!("Getting created at time"); - let time_string = json[0]["created_at"].to_string(); + let time_string = repo_metadata["created_at"].to_string(); let timestamp_str = time_string.trim_matches('"'); let timestamp = DateTime::parse_from_rfc3339(timestamp_str)?; + debug!("Successfully got created at time"); return Ok(timestamp.timestamp()); } -pub async fn get_last_activity(remote_url: &String) -> Result>{ - let repo_owner = get_repo_owner_from_url(remote_url)?; - let repo_name = get_repo_name_from_url(remote_url)?; - - let github_uri = format!("https://api.github.com/repos/{repo_owner}/{repo_name}/activity?per_page=1"); - - let json: serde_json::Value = reqwest::Client::new() - .get(github_uri) - .header(USER_AGENT, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36") - .send() - .await? - .json() - .await?; +pub async fn get_last_activity(repo_metadata: &Value) -> Result>{ + debug!("Getting created last activity time"); - let time_string = json[0]["timestamp"].to_string(); + let time_string = repo_metadata["updated_at"].to_string(); let timestamp_str = time_string.trim_matches('"'); let timestamp = DateTime::parse_from_rfc3339(timestamp_str)?; + debug!("Successfully got last activity time"); return Ok(timestamp.timestamp()); + } pub fn get_fern_hash_from_github(file: &GithubFile) -> String { @@ -139,47 +154,53 @@ pub fn is_fern_file_hash_equal(hash: &String, old_hash: &Option) -> bool } } -pub async fn get_star_count(remote_url: &String) -> Result>{ +pub async fn get_repo_metadata(remote_url: &String) -> Result> { + debug!("Getting repo metadata"); let repo_owner = get_repo_owner_from_url(remote_url)?; let repo_name = get_repo_name_from_url(remote_url)?; let uri = format!("https://api.github.com/repos/{}/{}", repo_owner, repo_name); - let json_data: serde_json::Value = reqwest::Client::new() - .get(uri) - .header(ACCEPT, "application/vnd.github+json") - .header(USER_AGENT, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36") - .send() + let json_data: serde_json::Value = get_request_wrapper(&uri) .await? .json() .await?; - let star_count: u64 = json_data.get("subscribers_count").unwrap().as_u64().unwrap(); + return Ok(json_data); +} + +pub async fn get_star_count(repo_metadata: &Value) -> Result>{ + + debug!("Getting star count"); + + let star_count: u64 = repo_metadata.get("subscribers_count").unwrap().as_u64().unwrap(); + + debug!("Successfully got star count"); return Ok(star_count); } pub async fn get_languages(remote_url: &String) -> Result, Box>{ + debug!("Getting languages"); let repo_owner = get_repo_owner_from_url(remote_url)?; let repo_name = get_repo_name_from_url(remote_url)?; let uri = format!("https://api.github.com/repos/{}/{}/languages", repo_owner, repo_name); // let json_data: serde_json::Value = reqwest::Client::new() - let json_data: serde_json::Value = reqwest::Client::new() - .get(uri) - .header(ACCEPT, "application/vnd.github+json") - .header(USER_AGENT, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36") - .send() + let json_data: serde_json::Value = get_request_wrapper(&uri) .await? .json() .await?; + let lang_array: &serde_json::Map = json_data.as_object().unwrap(); let langs: Vec = lang_array.keys().cloned().collect(); - + + debug!("Successfully got lanuages"); return Ok(langs); } pub async fn count_recommended_issues(remote_url: &String, recommended_issue_labels: &Vec) -> Result> { + debug!("Getting recommended issues"); let repo_owner = get_repo_owner_from_url(remote_url)?; let repo_name = get_repo_name_from_url(remote_url)?; @@ -189,16 +210,14 @@ pub async fn count_recommended_issues(remote_url: &String, recommended_issue_lab // TODO this doesn't handle duplicates let uri = format!("https://api.github.com/repos/{}/{}/issues?labels={}", repo_owner, repo_name, label); - let json_data: Vec = reqwest::Client::new() - .get(uri) - .header(ACCEPT, "application/vnd.github+json") - .header(USER_AGENT, "Frieren API") - .send() + let json_data: Vec = get_request_wrapper(&uri) .await? .json() - .await?; + .await?; ret+=json_data.len(); } + + debug!("Successfully got number of recommended issues"); return Ok(ret); } diff --git a/server-update-tool/src/main.rs b/server-update-tool/src/main.rs index 1cab655..1c7ffa9 100644 --- a/server-update-tool/src/main.rs +++ b/server-update-tool/src/main.rs @@ -7,6 +7,8 @@ use std::error::Error; use github::GithubFile; use chrono::{DateTime, Utc}; use std::collections::hash_set::HashSet; +use log::{debug}; +use env_logger; async fn handle_repo(repo: db::Repo) -> Result<(), Box> { let ref repo_origin = repo.repo_origin; @@ -14,15 +16,19 @@ async fn handle_repo(repo: db::Repo) -> Result<(), Box> { let ref mut new_repo = repo.clone(); - let last_updated = github::get_last_activity(&repo_origin).await?; + let repo_metadata = github::get_repo_metadata(&repo_origin).await?; + + debug!("Starting to handle repo {}", repo.name.unwrap()); + + let last_updated = github::get_last_activity(&repo_metadata).await?; let dt_last_updated: DateTime = DateTime::::from_timestamp(last_updated, 0).expect("invalid timestamp"); // We put the hack in hackathon - new_repo.date_created = DateTime::::from_timestamp(github::get_created_at_time(&repo_origin).await?, 0).expect("Invalid timestamp"); + new_repo.date_created = DateTime::::from_timestamp(github::get_created_at_time(&repo_metadata).await?, 0).expect("Invalid timestamp"); let file: GithubFile = match github::get_fern_file(&repo_origin, Some(&"cli".to_string())).await { Ok(_file) => _file, - Err(_) => github::get_fern_file(&repo_origin, None).await? + Err(_) => github::get_fern_file(&repo_origin, None).await? // TODO if this still fails, delete DB entry }; new_repo.hash = Some(github::get_fern_hash_from_github(&file)); @@ -50,16 +56,20 @@ async fn handle_repo(repo: db::Repo) -> Result<(), Box> { } } - let stars = github::get_star_count(&repo_origin).await?; + let stars = github::get_star_count(&repo_metadata).await?; new_repo.stars = Some(stars); new_repo.last_updated = dt_last_updated; new_repo.recommended_issues_count = Some(github::count_recommended_issues(&repo_origin, &new_repo.recommended_issue_labels.as_ref().unwrap()).await?); + db::update_repo(new_repo).await?; + return Ok(()); } #[tokio::main] async fn main() -> Result<(), Box> { + env_logger::init(); + let mut cursor = db::get_repos().await?; while cursor.advance().await? {