diff --git a/CHANGELOG.md b/CHANGELOG.md index 049b0d5..92ed7a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ Please: --- +## JSON-LD 0.1.1.1 - Pull Request + +* Enhanced `Get-JsonLD` web request options + * Added `-SkipCertificateCheck` passthrough to `Invoke-RestMethod` + * Added `-Authentication` passthrough to `Invoke-RestMethod` + * Added `-UserAgent` passthrough to `Invoke-RestMethod` + * Added `-Headers` passthrough to `Invoke-RestMethod` +* Added `-IgnoreCache` switch to bypass cached responses and force a fresh request +* Added `Write-Verbose`, `Write-Debug`, and `Write-Warning` output throughout for improved diagnostics +* Added error handling with `try/catch` around web requests and JSON parsing +* Fixed output formatting for direct JSON-LD API responses (true `application/ld+json` endpoints) + * `@graph` document responses now emit individual graph entities rather than the wrapper object + * Direct JSON-LD object and JSON-text responses are detected and processed without requiring HTML script-tag extraction + +--- + ## JSON-LD 0.1.1 * Updating Examples (#13) diff --git a/Commands/Get-JsonLD.ps1 b/Commands/Get-JsonLD.ps1 index a7a5b71..2f9124b 100644 --- a/Commands/Get-JsonLD.ps1 +++ b/Commands/Get-JsonLD.ps1 @@ -17,15 +17,16 @@ function Get-JsonLD { jsonld https://schema.org/Movie # Get-JSONLD will output the contents of a `@Graph` object if no `@type` is found. #> - [Alias('jsonLD','json-ld')] + [Alias('jsonLD', 'json-ld')] + [CmdletBinding()] param( - # The URL that may contain JSON-LD data - [Parameter(Mandatory, ValueFromPipeline, ValueFromPipelineByPropertyName)] - [Alias('href')] - [Uri] - $Url, + # The URL that may contain JSON-LD data + [Parameter(Mandatory, ValueFromPipeline, ValueFromPipelineByPropertyName)] + [Alias('href', 'Uri')] + [Uri] + $Url, - <# + <# If set, will the output as: @@ -38,19 +39,41 @@ function Get-JsonLD { |xml|the script tag, as xml| #> - [ValidateSet('html', 'json', 'jsonld', 'ld', 'linkedData', 'script', 'xml')] - [string] - $as = 'jsonld', + [ValidateSet('html', 'json', 'jsonld', 'ld', 'linkedData', 'script', 'xml')] + [string] + $as = 'jsonld', + + # If set, bypasses certificate validation for HTTPS requests. + [switch] + $SkipCertificateCheck, + + # Authentication mechanism to pass directly to Invoke-RestMethod. + # Use a version-agnostic type here so the function can be imported on + # PowerShell versions where WebAuthenticationType is unavailable. + [ValidateSet('None', 'Basic', 'Bearer', 'OAuth')] + [string] + $Authentication, + + # User agent string to pass directly to Invoke-RestMethod. + [string] + $UserAgent, + + # Headers to pass directly to Invoke-RestMethod. + [Collections.IDictionary] + $Headers, - [switch] - $RawHtml, + # If set, ignores the cached response and forces a fresh request. + [switch] + $IgnoreCache, - # If set, will force the request to be made even if the URL has already been cached. - [switch] - $Force + # If set, will force the request to be made even if the URL has already been cached. + [switch] + $Force ) begin { + Write-Verbose "Initializing Get-JsonLD" + # Create a pattern to match the JSON-LD script tag $linkedDataRegex = [Regex]::new(@' (? @@ -64,115 +87,250 @@ application/ld\+json # The type that indicates linked d \> # Match the end of the start tag (?(?:.|\s){0,}?(?=\z|)) # Anything until the end tag is JSONContent ) -'@, 'IgnoreCase,IgnorePatternWhitespace','00:00:00.1') +'@, 'IgnoreCase,IgnorePatternWhitespace', '00:00:00.1') # Initialize the cache for JSON-LD requests if (-not $script:Cache) { $script:Cache = [Ordered]@{} + Write-Debug "Initialized JSON-LD cache store" } filter output { $in = $_ - $mySelf = $MyInvocation.MyCommand + $context = $null + $shouldOutput = $true if ($in.'@context' -is [string]) { - $context = $in.'@context' + $context = $in.'@context' } if ($in.'@graph') { - if ($in.pstypenames -ne 'application/ld+json') { - $in.pstypenames.insert(0,'application/ld+json') + if ($in.pstypenames -notcontains 'application/ld+json') { + $in.pstypenames.insert(0, 'application/ld+json') } foreach ($graphObject in $in.'@graph') { - $null = $graphObject | - & $mySelf + $graphObject | output } + # Emit graph entries instead of the wrapper document. + $shouldOutput = $false } elseif ($in.'@type') { $typeName = if ($context) { $context, $in.'@type' -join '/' - } else { + } + else { $in.'@type' } - if ($in.pstypenames -ne 'application/ld+json') { - $in.pstypenames.insert(0,'application/ld+json') + if ($in.pstypenames -notcontains 'application/ld+json') { + $in.pstypenames.insert(0, 'application/ld+json') } - if ($in.pstypenames -ne $typeName) { - $in.pstypenames.insert(0,$typeName) + if ($in.pstypenames -notcontains $typeName) { + $in.pstypenames.insert(0, $typeName) } foreach ($property in $in.psobject.properties) { if ($property.value.'@type') { - $null = $property.value | - & $mySelf - } - } + $null = $property.value | output + } + } + } + + if ($shouldOutput) { + $in } - $in } $foreachFile = { $inFile = $_.FullName try { + Write-Verbose "Reading JSON-LD from file: $inFile" Get-Content -LiteralPath $_.FullName -Raw | - ConvertFrom-Json | - output - } catch { - Write-Verbose "$($inFile.FullName) : $_" + ConvertFrom-Json | + output + } + catch { + Write-Warning "Could not parse JSON-LD content from file: $inFile" + Write-Debug "File parse error for '$inFile': $($_.Exception.Message)" } } } process { + Write-Verbose "Processing URL: $Url" + + $isJsonLdObject = { + param($InputObject) + if (-not $InputObject) { return $false } + + $propertyNames = @($InputObject.psobject.properties.Name) + if ( + ($propertyNames -contains '@context') -or + ($propertyNames -contains '@type') -or + ($propertyNames -contains '@graph') + ) { + return $true + } + if ( + $InputObject -is [System.Collections.IEnumerable] -and + $InputObject -isnot [string] + ) { + foreach ($item in $InputObject) { + if (-not $item) { continue } + $itemPropertyNames = @($item.psobject.properties.Name) + if ( + ($itemPropertyNames -contains '@context') -or + ($itemPropertyNames -contains '@type') -or + ($itemPropertyNames -contains '@graph') + ) { + return $true + } + } + } + return $false + } + if ($url.IsFile -or -not $url.AbsoluteUri ) { if (Test-Path $url.OriginalString) { + Write-Verbose "Reading JSON-LD from local path: $($url.OriginalString)" Get-ChildItem $url.OriginalString -File | - Foreach-Object $foreachFile - } elseif ($MyInvocation.MyCommand.Module -and + Foreach-Object $foreachFile + } + elseif ($MyInvocation.MyCommand.Module -and (Test-Path ( Join-Path ( $MyInvocation.MyCommand.Module | Split-Path ) $url.OriginalString )) ) { + Write-Verbose "Reading JSON-LD from module-relative path: $($url.OriginalString)" Get-ChildItem -Path ( Join-Path ( $MyInvocation.MyCommand.Module | Split-Path ) $url.OriginalString ) -File | - Foreach-Object $foreachFile + Foreach-Object $foreachFile + } + else { + Write-Warning "Path not found for URL/file input: $($url.OriginalString)" } return } - $restResponse = - if ($Force -or -not $script:Cache[$url]) { - $script:Cache[$url] = Invoke-RestMethod -Uri $Url + $invokeRestMethodSplat = @{ Uri = $Url } + if ($PSBoundParameters.ContainsKey('SkipCertificateCheck')) { + $invokeRestMethodSplat.SkipCertificateCheck = $SkipCertificateCheck + } + if ($PSBoundParameters.ContainsKey('Authentication')) { + $invokeRestMethodSplat.Authentication = $Authentication + } + if ($PSBoundParameters.ContainsKey('UserAgent')) { + $invokeRestMethodSplat.UserAgent = $UserAgent + } + if ($PSBoundParameters.ContainsKey('Headers')) { + $invokeRestMethodSplat.Headers = $Headers + } + + Write-Debug ("Invoke-RestMethod parameter keys: {0}" -f (($invokeRestMethodSplat.Keys | Sort-Object) -join ', ')) + + try { + $restResponse = + if ($Force -or $IgnoreCache -or -not $script:Cache[$url]) { + Write-Verbose "Fetching fresh response from remote URL" + $script:Cache[$url] = Invoke-RestMethod @invokeRestMethodSplat $script:Cache[$url] - } else { + } + else { + Write-Verbose "Using cached response" $script:Cache[$url] } + } + catch { + Write-Error -Message "Failed to retrieve JSON-LD from '$Url'. $($_.Exception.Message)" -Exception $_.Exception -Category $_.CategoryInfo.Category -TargetObject $_.TargetObject + return + } if ($as -eq 'html') { + Write-Debug "Returning raw HTML response" return $restResponse - } + } + + $emitDirectJsonLdResponse = { + param( + [Parameter(Mandatory)] + $JsonLdResponse + ) + + if ($As -eq 'xml') { + Write-Warning "XML output is not available for direct JSON-LD API responses; returning JSON instead" + return $JsonLdResponse | ConvertTo-Json -Depth 100 + } + + if ($As -eq 'script') { + Write-Warning "Script output is not available for direct JSON-LD API responses; returning JSON instead" + return $JsonLdResponse | ConvertTo-Json -Depth 100 + } + + if ($As -eq 'json') { + return $JsonLdResponse | ConvertTo-Json -Depth 100 + } + + foreach ($jsonObject in @($JsonLdResponse)) { + if ($jsonObject.'@type' -or $jsonObject.'@graph') { + $jsonObject | output + } + else { + $jsonObject + } + } + } + + # Handle API responses where the body is already JSON-LD (not embedded in HTML). + if (& $isJsonLdObject $restResponse) { + Write-Verbose "Detected direct JSON-LD object response" + & $emitDirectJsonLdResponse $restResponse + return + } + + # Some servers return JSON-LD as plain text; try parsing it before HTML script-tag extraction. + if ($restResponse -is [string]) { + try { + $parsedJson = $restResponse | ConvertFrom-Json -ErrorAction Stop + if (& $isJsonLdObject $parsedJson) { + Write-Verbose "Detected direct JSON-LD text response" + & $emitDirectJsonLdResponse $parsedJson + return + } + } + catch { + Write-Debug "Response was not parseable as direct JSON-LD text; trying HTML script-tag extraction" + } + } # Find all linked data tags within the response - foreach ($match in $linkedDataRegex.Matches("$restResponse")) { + $linkedDataMatches = $linkedDataRegex.Matches("$restResponse") + Write-Debug "Linked data script tags found: $($linkedDataMatches.Count)" + + if (-not $linkedDataMatches.Count) { + Write-Warning "No JSON-LD script tags were found in response from '$Url'" + } + + foreach ($match in $linkedDataMatches) { # If we want the result as xml if ($As -eq 'xml') { # try to cast it - $matchXml ="$match" -as [xml] + $matchXml = "$match" -as [xml] if ($matchXml) { # and output it if found. $matchXml continue - } else { + } + else { # otherwise, fall back to the `