From ced1480c97ebb3ab3238eb19232aa5079260a8d0 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Wed, 25 Sep 2024 18:33:15 +0800 Subject: [PATCH 1/5] Add hyperlinks and paths validation. Signed-off-by: ZePan110 --- .github/workflows/pr-path-detection.yml | 151 ++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 .github/workflows/pr-path-detection.yml diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml new file mode 100644 index 00000000..9d12c346 --- /dev/null +++ b/.github/workflows/pr-path-detection.yml @@ -0,0 +1,151 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Check Paths and Hyperlinks + +on: + pull_request: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] + +jobs: + check-the-validity-of-hyperlinks-in-README: + runs-on: ubuntu-latest + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout Repo docs + uses: actions/checkout@v4 + + - name: Check the Validity of Hyperlinks + run: | + cd ${{github.workspace}} + fail="FALSE" + url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'linkedin') + if [ -n "$url_lines" ]; then + for url_line in $url_lines; do + url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') + path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) + if [[ "https://intel.sharepoint.com/:v:/s/mlconsultingandsupport/EZa7vjON10ZCpMvE7U-SPMwBRXbVHqe1Ybsa-fmnXayNUA?e=f6FPsl" == "$url" || "https://intel.sharepoint.com/:v:/s/mlconsultingandsupport/ESMIcBseFTdIuqkoB7TZy6ABfwR9CkfV49TvTa1X_Jihkg?e=zMH7O7" == "$url" ]]; then + echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person." + else + response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response" -ne 200 ]; then + echo "**********Validation failed, status code: $response, try again**********" + response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response_retry" -eq 200 ]; then + echo "*****Retry successful*****" + else + urls_line+=("$url_line") + echo "Status code: $response_retry, Link $url validation failed, will retry later." + fi + fi + fi + done + fi + echo "**************Start Retry**************" + for link in "${urls_line[@]}"; do + url=$(echo "$link"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') + path=$(echo "$link"|cut -d':' -f1 | cut -d'/' -f2-) + attempt_num=1 + while [ $attempt_num -le 5 ]; do + do_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") + if [ "$do_retry" -eq 200 ]; then + echo "$url Retry successful" + break + else + echo "$url Validation failed, retrying..." + ((attempt_num++)) + sleep 10 + fi + done + + if [ $attempt_num -gt 5 ]; then + echo "Invalid link from ${{github.workspace}}/$path: $url status code: $do_retry" + fail="TRUE" + fi + done + + if [[ "$fail" == "TRUE" ]]; then + exit 1 + else + echo "All hyperlinks are valid." + fi + shell: bash + + check-the-validity-of-relative-path: + runs-on: ubuntu-latest + steps: + - name: Clean up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout Repo docs + uses: actions/checkout@v4 + + - name: Checking Relative Path Validity + run: | + cd ${{github.workspace}} + fail="FALSE" + repo_name=${{ github.event.pull_request.head.repo.full_name }} + if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then + owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1) + branch="https://github.com/$owner/docs/tree/${{ github.event.pull_request.head.ref }}" + else + branch="https://github.com/opea-project/docs/blob/${{ github.event.pull_request.head.ref }}" + fi + link_head="https://github.com/opea-project/docs/blob/main" + IFS=$'\n' + png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http'|grep -Ev 'mailto') + if [ -n "$png_lines" ]; then + for png_line in $png_lines; do + refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) + png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) + if [[ "${png_path:0:1}" == "/" ]]; then + check_path=${{github.workspace}}$png_path + elif [[ "${png_path:0:1}" == "#" ]]; then + check_path=${{github.workspace}}/$refer_path$png_path + else + check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path + fi + real_path=$(realpath $check_path) + if [ $? -ne 0 ]; then + echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist" + fail="TRUE" + else + url=$link_head$(echo "$real_path" | sed 's|.*/docs||') + response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response" -ne 200 ]; then + echo "**********Validation failed, status code: $response try again**********" + response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response_retry" -eq 200 ]; then + echo "*****Retry successful*****" + else + echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}" + url_dev=$branch$(echo "$real_path" | sed 's|.*/docs||') + response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") + if [ "$response" -ne 200 ]; then + echo "**********Validation failed, status code: $response_retry, try again**********" + response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev") + if [ "$response_retry" -eq 200 ]; then + echo "*****Retry successful*****" + else + echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path status code: $response_retry" + echo "$png_line" + fail="TRUE" + fi + else + echo "Check branch ${{ github.event.pull_request.head.ref }} successfully." + fi + fi + fi + fi + done + fi + IFS=$OLDIFS + if [[ "$fail" == "TRUE" ]]; then + exit 1 + else + echo "All relative links valid." + fi + shell: bash From 3f0d88c1a2d3251c58fe3c7220b685bab675beb0 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Wed, 25 Sep 2024 18:38:21 +0800 Subject: [PATCH 2/5] Fix path errors. Signed-off-by: ZePan110 --- community/CONTRIBUTING.md | 2 +- community/rfcs/24-05-16-OPEA-001-Overall-Design.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/community/CONTRIBUTING.md b/community/CONTRIBUTING.md index 7da855f3..37e230ae 100644 --- a/community/CONTRIBUTING.md +++ b/community/CONTRIBUTING.md @@ -370,7 +370,7 @@ The OPEA projects use GitHub Action for CI test. - End to End Test, the PR must pass all end to end tests. #### Pull Request Review -You can add reviewers from [the code owners list](../codeowner.md) to your PR. +You can add reviewers from [the code owners list](./codeowner.md) to your PR. ## Support diff --git a/community/rfcs/24-05-16-OPEA-001-Overall-Design.md b/community/rfcs/24-05-16-OPEA-001-Overall-Design.md index 7dff6306..f227a2fa 100644 --- a/community/rfcs/24-05-16-OPEA-001-Overall-Design.md +++ b/community/rfcs/24-05-16-OPEA-001-Overall-Design.md @@ -38,7 +38,7 @@ This RFC is used to present the OPEA overall design philosophy, including overal The proposed overall architecture is -![OPEA Architecture](opea_architecture.png "OPEA Architecture") +![OPEA Architecture](opea_architecture.png) 1. GenAIComps @@ -58,7 +58,7 @@ The proposed overall architecture is The proposed OPEA workflow is -![OPEA Workflow](opea_workflow.png "OPEA Workflow") +![OPEA Workflow](opea_workflow.png) 1. Microservice From 2ed0f8ed2237221390a1d6713fcd6c314d99c808 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Wed, 9 Oct 2024 00:02:51 +0800 Subject: [PATCH 3/5] Optimize path and link validity check Signed-off-by: ZePan110 --- .github/workflows/pr-path-detection.yml | 94 +++++++++++++------------ 1 file changed, 50 insertions(+), 44 deletions(-) diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml index 9d12c346..f27aa5c9 100644 --- a/.github/workflows/pr-path-detection.yml +++ b/.github/workflows/pr-path-detection.yml @@ -17,30 +17,35 @@ jobs: - name: Checkout Repo docs uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Check the Validity of Hyperlinks run: | cd ${{github.workspace}} fail="FALSE" - url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'linkedin') - if [ -n "$url_lines" ]; then - for url_line in $url_lines; do - url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') - path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) - if [[ "https://intel.sharepoint.com/:v:/s/mlconsultingandsupport/EZa7vjON10ZCpMvE7U-SPMwBRXbVHqe1Ybsa-fmnXayNUA?e=f6FPsl" == "$url" || "https://intel.sharepoint.com/:v:/s/mlconsultingandsupport/ESMIcBseFTdIuqkoB7TZy6ABfwR9CkfV49TvTa1X_Jihkg?e=zMH7O7" == "$url" ]]; then - echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person." - else - response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response" -ne 200 ]; then - echo "**********Validation failed, status code: $response, try again**********" - response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response_retry" -eq 200 ]; then - echo "*****Retry successful*****" - else - urls_line+=("$url_line") - echo "Status code: $response_retry, Link $url validation failed, will retry later." + # url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'linkedin') + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" + if [ -n "changed_files" ]; then + for changed_file in $changed_files; do + url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'linkedin') + if [ -n "$url_lines" ]; then + for url_line in $url_lines; do + url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') + path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) + response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response" -ne 200 ]; then + echo "**********Validation failed, status code: $response, try again**********" + response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response_retry" -eq 200 ]; then + echo "*****Retry successful*****" + else + urls_line+=("$url_line") + echo "Status code: $response_retry, Link $url validation failed, will retry later." + fi fi - fi + done fi done fi @@ -82,6 +87,8 @@ jobs: - name: Checkout Repo docs uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Checking Relative Path Validity run: | @@ -95,54 +102,53 @@ jobs: branch="https://github.com/opea-project/docs/blob/${{ github.event.pull_request.head.ref }}" fi link_head="https://github.com/opea-project/docs/blob/main" - IFS=$'\n' + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http'|grep -Ev 'mailto') if [ -n "$png_lines" ]; then for png_line in $png_lines; do refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) if [[ "${png_path:0:1}" == "/" ]]; then - check_path=${{github.workspace}}$png_path - elif [[ "${png_path:0:1}" == "#" ]]; then - check_path=${{github.workspace}}/$refer_path$png_path + check_path=$png_path + elif [[ "$png_path" == *#* ]]; then + relative_path=$(echo "$png_path" | cut -d '#' -f1) + if [ -n "$relative_path" ]; then + check_path=$(dirname "$refer_path")/$relative_path + png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}') + else + check_path=$refer_path + fi else - check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path + check_path=$(dirname "$refer_path")/$png_path fi - real_path=$(realpath $check_path) - if [ $? -ne 0 ]; then - echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist" - fail="TRUE" - else - url=$link_head$(echo "$real_path" | sed 's|.*/docs||') - response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response" -ne 200 ]; then - echo "**********Validation failed, status code: $response try again**********" - response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response_retry" -eq 200 ]; then - echo "*****Retry successful*****" - else - echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}" - url_dev=$branch$(echo "$real_path" | sed 's|.*/docs||') + if [ -e "$check_path" ]; then + real_path=$(realpath $check_path) + if [[ "$png_line" == *#* ]]; then + if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then + url_dev=$branch$(echo "$real_path" | sed 's|.*/docs||')$png_path response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") if [ "$response" -ne 200 ]; then - echo "**********Validation failed, status code: $response_retry, try again**********" + echo "**********Validation failed, try again**********" response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev") if [ "$response_retry" -eq 200 ]; then - echo "*****Retry successful*****" + echo "*****Retry successfully*****" else - echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path status code: $response_retry" - echo "$png_line" + echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev" fail="TRUE" fi else - echo "Check branch ${{ github.event.pull_request.head.ref }} successfully." + echo "Validation succeed $png_line" fi fi fi + else + echo "$check_path does not exist" + fail="TRUE" fi done fi - IFS=$OLDIFS + if [[ "$fail" == "TRUE" ]]; then exit 1 else From 098446525e8394f0ac0cbf511b423ef9ad6c88d5 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Wed, 9 Oct 2024 00:22:36 +0800 Subject: [PATCH 4/5] debug Signed-off-by: ZePan110 --- .github/workflows/pr-path-detection.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml index f27aa5c9..0a28a8d1 100644 --- a/.github/workflows/pr-path-detection.yml +++ b/.github/workflows/pr-path-detection.yml @@ -143,7 +143,7 @@ jobs: fi fi else - echo "$check_path does not exist" + echo "$check_path does not exist $png_line" fail="TRUE" fi done From bc2b92876af3a0c9a3d7c39cf84d45c2efbb91eb Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Wed, 9 Oct 2024 00:25:21 +0800 Subject: [PATCH 5/5] Fix issue Signed-off-by: ZePan110 --- .github/workflows/pr-path-detection.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml index 0a28a8d1..11157a9b 100644 --- a/.github/workflows/pr-path-detection.yml +++ b/.github/workflows/pr-path-detection.yml @@ -110,7 +110,7 @@ jobs: refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) if [[ "${png_path:0:1}" == "/" ]]; then - check_path=$png_path + check_path="."$(echo "$png_path" | cut -d '#' -f1) elif [[ "$png_path" == *#* ]]; then relative_path=$(echo "$png_path" | cut -d '#' -f1) if [ -n "$relative_path" ]; then