Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 49 additions & 18 deletions .github/actions/run_event_validation/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,19 @@ author: Tomasz Nazarewicz
inputs:
tag:
description: "Version of the spec to check against"
required: true
release:
default: ""
required: false
release_tags:
description: "Versions of the spec to check against"
required: false
ol_release:
description: "release to run the validation with"
required: true
default: ""
required: false
component_release:
description: "release of the component producing events"
default: ""
required: false
target-path:
description: "Path to save the report to"
required: true
Expand All @@ -31,35 +40,57 @@ outputs:
runs:
using: "composite"
steps:
- name: Set up Python 3.11
uses: actions/setup-python@v3
with:
python-version: "3.11"

- name: create dir for ol spec and report
id: dir_for_spec
- name: create necessary dirs
id: create_dirs
shell: bash
run: |
mkdir -p spec
mkdir -p specs
mkdir -p tmp
mkdir -p report

- name: get latest OL spec
- name: get OpenlineageCode
uses: actions/checkout@v4
with:
repository: OpenLineage/OpenLineage
ref: ${{ inputs.tag }}
path: spec
sparse-checkout: |
spec/
path: tmp

- name: Get spec for each tag
shell: bash
run: |
cd tmp
IFS=',' read -ra TAGS <<< "${{ inputs.release_tags }}"
for TAG in "${TAGS[@]}"; do
echo "Checking out tag: $TAG"
git fetch --tags --quiet
if git checkout --quiet "$TAG"; then
DEST_DIR="../specs/$TAG"
if [ -d "spec" ]; then
mkdir -p "../specs/$TAG"
find spec -path './website' -prune -o -type f \( -name '*Facet.json' -o -name 'OpenLineage.json' \) -exec cp {} "../specs/$TAG/" \;
echo "success"
else
echo "Spec directory not found in $TAG"
fi
else
echo "Tag $TAG not found!"
fi
done
cd ..

- name: Set up Python 3.11
uses: actions/setup-python@v3
with:
python-version: "3.11"

- name: Validate OpenLineage events
shell: bash
run: |
pip install -r ./scripts/requirements.txt
python scripts/validate_ol_events.py \
--event_base_dir=${{ inputs.event-directory }} \
--spec_dirs=spec/spec/,spec/spec/facets/,spec/spec/registry/gcp/dataproc/facets,spec/spec/registry/gcp/lineage/facets \
--spec_base_dir=specs \
--target=${{ inputs.target-path }} \
--component="${{ inputs.component }}" \
--producer_dir=${{ inputs.producer-dir }} \
--release=${{ inputs.release }}
--openlineage_version=${{ inputs.release_tags }} \
--component_version=${{ inputs.component_release }}
25 changes: 23 additions & 2 deletions .github/workflows/check_scenarios.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,36 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: get scenarios' OL versions
id: get_versions
run: |
SCENARIO_DIR=./consumer/scenarios
if [ ! -d "$SCENARIO_DIR" ]; then
echo "Scenario directory '$SCENARIO_DIR' not found!"
exit 1
fi

VERSIONS=()
for scenario in "$SCENARIO_DIR"/*; do
if [ -d "$scenario" ] && [ -f "$scenario/config.json" ]; then
VERSION=$(jq -r '.openlineage_version' "$scenario/config.json" 2>/dev/null)
if [ -n "$VERSION" ] && [ "$VERSION" != "null" ]; then
VERSIONS+=("$VERSION")
fi
fi
done

UNIQUE_VERSIONS=($(printf "%s\n" "${VERSIONS[@]}" | sort -u))
echo "versions=$(IFS=,; echo "${UNIQUE_VERSIONS[*]}")" >> $GITHUB_OUTPUT

- name: Create report.json directory
run: mkdir -p reports/

- name: Validation
uses: ./.github/actions/run_event_validation
with:
component: 'scenarios'
tag: ${{ inputs.get-latest-snapshots == 'true' && 'main' || inputs.release }}
release: ${{ inputs.release }}
release_tags: ${{ steps.get_versions.outputs.versions }}
target-path: 'reports/scenarios-report.json'
event-directory: './consumer/scenarios'
producer-dir: './consumer'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/collect_and_compare_reports.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: refs/heads/main
# with:
# ref: refs/heads/main

- uses: actions/upload-artifact@v4
with:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/consumer_dataplex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ jobs:
--consumer_dir consumer/consumers/dataplex \
--scenario_dir consumer/scenarios/ \
--parent projects/gcp-open-lineage-testing/locations/us \
--release ${{ inputs.release }}
--release ${{ inputs.release }} \
--target dataplex-report.json

- uses: actions/upload-artifact@v4
with:
Expand Down
23 changes: 1 addition & 22 deletions .github/workflows/main_new_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,28 +44,7 @@ jobs:
# also normally new release of OL should trigger all producer tests but for now they are run anyway so no need to trigger
- name: Select components to run
id: select-components
run: |
# assuming the version will not exceed 1000 this is the quickest way to get comparable values
version_sum() {
IFS='.' read -r var1 var2 var3 <<< "$1"
echo $(( var1 * 1000000 + var2 * 1000 ))
}

current_ol=$(cat generated-files/releases.json | jq -c '.[] | select(.name | contains("openlineage")) | .latest_version ' -r)
latest_ol=$(curl https://api.github.com/repos/OpenLineage/OpenLineage/releases/latest -s | jq .tag_name -r)

sum1=$(version_sum "$latest_ol")
sum2=$(version_sum "$current_ol")

if (( $(version_sum $latest_ol) > $(version_sum $current_ol) )); then
echo "ol_release=${latest_ol}" >> $GITHUB_OUTPUT
echo "releases_updated=true" >> $GITHUB_OUTPUT
jq --arg latest_ol "$latest_ol" 'map(if .name == "openlineage" then .latest_version = $latest_ol else . end)' \
generated-files/releases.json > generated-files/updated-releases.json
else
echo "ol_release=${current_ol}" >> $GITHUB_OUTPUT
fi

run: ./scripts/select_components.sh
- uses: actions/upload-artifact@v4
if: steps.select-components.outputs.releases_updated == 'true'
with:
Expand Down
47 changes: 34 additions & 13 deletions .github/workflows/main_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ jobs:
run_spark_dataproc: ${{ steps.get-changed.outputs.spark_dataproc_changed }}
ol_release: ${{ steps.get-release.outputs.openlineage_release }}
any_run: ${{ steps.get-changed.outputs.any_changed }}
test_matrix: ${{ steps.set-matrix-values.outputs.spark_dataproc_matrix }}
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand Down Expand Up @@ -64,7 +65,24 @@ jobs:
run: |
echo " any changed value is ${{ steps.get-changed.outputs.any_changed }}"
openlineage_release=$(cat generated-files/releases.json | jq -c '.[] | select(.name | contains("openlineage")) | .latest_version ' -r)
echo "openlineage_release=${openlineage_release}" >> $GITHUB_OUTPUT
echo "openlineage_release=${openlineage_release}" >> $GITHUB_OUTPUT
- name: set-matrix-values
id: set-matrix-values
run: |
check_producer() {
local producer="$1"
local file="./producer/${producer}/versions.json"

if [[ -f "$file" ]]; then
cat "$file" | jq -c
else
echo "Error: File '$file' does not exist." >&2
return 1
fi
}
echo "spark_dataproc_matrix=$(check_producer spark_dataproc)" >> $GITHUB_OUTPUT

# echo "myoutput=$(jq -cn --argjson environments "$TARGETS" '{target: $environments}')" >> $GITHUB_OUTPUT

######## COMPONENT VALIDATION ########

Expand All @@ -87,17 +105,20 @@ jobs:
with:
release: ${{ needs.initialize_workflow.outputs.ol_release }}

spark_dataproc:
needs: initialize_workflow
if: ${{ needs.initialize_workflow.outputs.run_spark_dataproc == 'true' }}
uses: ./.github/workflows/producer_spark_dataproc.yml
secrets:
gcpKey: ${{ secrets.GCP_SA_KEY }}
postgresqlUser: ${{ secrets.POSTGRESQL_USER }}
postgresqlPassword: ${{ secrets.POSTGRESQL_PASSWORD }}
with:
release: ${{ needs.initialize_workflow.outputs.ol_release }}
get-latest-snapshots: 'false'
# spark_dataproc:
# needs: initialize_workflow
# if: ${{ needs.initialize_workflow.outputs.run_spark_dataproc == 'true' }}
# uses: ./.github/workflows/producer_spark_dataproc.yml
# strategy:
# matrix: ${{ fromJson(needs.initialize_workflow.outputs.test_matrix) }}
# secrets:
# gcpKey: ${{ secrets.GCP_SA_KEY }}
# postgresqlUser: ${{ secrets.POSTGRESQL_USER }}
# postgresqlPassword: ${{ secrets.POSTGRESQL_PASSWORD }}
# with:
# ol_release: ${{ matrix.openlineage_versions }}
# spark_release: ${{ matrix.component_version }}
# get-latest-snapshots: 'false'

######## COLLECTION OF REPORTS AND EXECUTE APPROPRIATE ACTIONS ########

Expand All @@ -106,7 +127,7 @@ jobs:
- initialize_workflow
- scenarios
- dataplex
- spark_dataproc
# - spark_dataproc
if: ${{ !failure() && needs.initialize_workflow.outputs.any_run == 'true'}}
uses: ./.github/workflows/collect_and_compare_reports.yml
with:
Expand Down
Loading