Module sws.redirects

Provides a function for tracing redirects

Examples

Trace the redirects from kieranwood.ca

from sws.redirects import trace

trace('kieranwood.ca', print_result = True) '''Prints:

Printing trace for http://kieranwood.ca

Redirect level:1
URL: http://kieranwood.ca/
HTTP Code: 301

Redirect level:2
URL: https://kieranwood.ca
HTTP Code: 200'''
Expand source code
"""Provides a function for tracing redirects

Examples
--------
### Trace the redirects from kieranwood.ca
```
from sws.redirects import trace

trace('kieranwood.ca', print_result = True) '''Prints:

Printing trace for http://kieranwood.ca

Redirect level:1
URL: http://kieranwood.ca/
HTTP Code: 301

Redirect level:2
URL: https://kieranwood.ca
HTTP Code: 200'''
```
"""

# Standard library Dependencies
import logging                  # Used for logging
from typing import Union        # Used for type hints with multiple types

# External Dependencies
import requests                 # Used to make http requests for redirect tracing


def trace(url: str, ignored_domains: Union[list, bool], print_result: bool = True) -> list:
    """Trace all redirects associated with a URL.

    Arguments
    ---------
    url : str
        The URL to trace, can include or not include a protocol

    ignored_domains : list[str] or bool
        A list of domains (without protocols) to ignore in the trace; False
        can be passed in if no domains should be ignored

    print_result : bool
        If true then the value will be printed in a human readable format

    Notes
    -----
    url argument can include or not include a protocol

    Raises
    ------
    ValueError:
        If url cannot be connected to a ValueError will be raised

    Returns
    -------
    list[responses]:
        The list of traced responses

    Examples
    --------
    Trace the redirects from kieranwood.ca
    ```
    from sws.redirects import trace

    trace('kieranwood.ca', print_result = True) '''Prints:

    Printing trace for http://kieranwood.ca

    Redirect level:1
    URL: http://kieranwood.ca/
    HTTP Code: 301

    Redirect level:2
    URL: https://kieranwood.ca
    HTTP Code: 200'''
    ```
    """
    logging.info(f"Entering trace(url={url}, ignored_domains={ignored_domains}, print_result={print_result})")

    logging.info(f"Checking protocol is present on {url}")
    # Checks if protocols are present
    if "https://" in url:
        ...  # Continue
    elif "http://" in url:
        ...  # Continue
    else:  # Add a protocol to URL
        url = "http://" + url
        logging.info(f"Changed url to {url}")

    # Try going to the provided URL
    logging.info("Starting HTTP request")
    try:
        response = requests.get(url)

    except requests.exceptions.ConnectionError:
        if print_result:
            print(f"Could not connect to {url}, please ensure there are no spelling mistakes")
        raise ValueError(f"Could not connect to {url}, please ensure there are no spelling mistakes")
    except Exception as identifier:
        if print_result:
            print(f"Error while checking {url} \nError Code: {identifier}")
        return [f"Error while checking {url} \nError Code: {identifier}"]

    output = []  # The result of the response
    if response.history:  # If the request was redirected
        if ignored_domains:
            logging.debug("Skipping ignored domains")
            response.history = _skip_ignored_domains(response.history, ignored_domains)
        if print_result:
            print(f"\nPrinting response for {url}")
        for level, redirect in enumerate(response.history):
            logging.debug(f"Appending redirect {redirect.url} to output")
            output.append([level+1, redirect.url, redirect.status_code])
        output.append([len(output)+1, response.url, response.status_code])
        if print_result:
            logging.debug("Printing result(s)")
            for redirect in output:
                print(f"\nRedirect level:{redirect[0]} \nURL: {redirect[1]} \nHTTP Code: {redirect[2]}")
        logging.info(f"Exiting trace() and returning {output}") 
        return output
    else:  # If the request was not redirected
        if print_result:
            print("Request was not redirected")
        logging.info("Exiting trace() and returning ['Request was not redirected']") 
        return ["Request was not redirected"]


def _skip_ignored_domains(response_trace: list, ignored_domains: list) -> list:
    """Takes a list of responses and removes any responses that
    have domains that are in the ignored_domains variable

    Arguments
    ---------
    response_trace : list[responses]
        List of responses to strip domain results from

    Notes
    -----
    ignored_domains argument can include or not include a protocol

    Returns
    -------
    list[responses]:
        The stripped list of responses

    Examples
    --------
    Skip all domains with safelinks.protection.outlook.com or can01.safelinks.protection.outlook.com in the responses
    ```
    from sws.utilities.redirects import trace

    trace('kieranwood.ca', ["safelinks.protection.outlook.com", "can01.safelinks.protection.outlook.com"], print_result = True)
    ```
    """
    # Remove instances of ignored domains from the response trace
    for domain in ignored_domains:
        for response in response_trace:
            if domain in response.url:
                response_trace.remove(response)
            else:
                continue
    return response_trace

Functions

def trace(url: str, ignored_domains: Union[list, bool], print_result: bool = True) ‑> list

Trace all redirects associated with a URL.

Arguments

url : str
The URL to trace, can include or not include a protocol
ignored_domains : list[str] or bool
A list of domains (without protocols) to ignore in the trace; False can be passed in if no domains should be ignored
print_result : bool
If true then the value will be printed in a human readable format

Notes

url argument can include or not include a protocol

Raises

Valueerror

If url cannot be connected to a ValueError will be raised

Returns

list[responses]:
The list of traced responses

Examples

Trace the redirects from kieranwood.ca

from sws.redirects import trace

trace('kieranwood.ca', print_result = True) '''Prints:

Printing trace for http://kieranwood.ca

Redirect level:1
URL: http://kieranwood.ca/
HTTP Code: 301

Redirect level:2
URL: https://kieranwood.ca
HTTP Code: 200'''
Expand source code
def trace(url: str, ignored_domains: Union[list, bool], print_result: bool = True) -> list:
    """Trace all redirects associated with a URL.

    Arguments
    ---------
    url : str
        The URL to trace, can include or not include a protocol

    ignored_domains : list[str] or bool
        A list of domains (without protocols) to ignore in the trace; False
        can be passed in if no domains should be ignored

    print_result : bool
        If true then the value will be printed in a human readable format

    Notes
    -----
    url argument can include or not include a protocol

    Raises
    ------
    ValueError:
        If url cannot be connected to a ValueError will be raised

    Returns
    -------
    list[responses]:
        The list of traced responses

    Examples
    --------
    Trace the redirects from kieranwood.ca
    ```
    from sws.redirects import trace

    trace('kieranwood.ca', print_result = True) '''Prints:

    Printing trace for http://kieranwood.ca

    Redirect level:1
    URL: http://kieranwood.ca/
    HTTP Code: 301

    Redirect level:2
    URL: https://kieranwood.ca
    HTTP Code: 200'''
    ```
    """
    logging.info(f"Entering trace(url={url}, ignored_domains={ignored_domains}, print_result={print_result})")

    logging.info(f"Checking protocol is present on {url}")
    # Checks if protocols are present
    if "https://" in url:
        ...  # Continue
    elif "http://" in url:
        ...  # Continue
    else:  # Add a protocol to URL
        url = "http://" + url
        logging.info(f"Changed url to {url}")

    # Try going to the provided URL
    logging.info("Starting HTTP request")
    try:
        response = requests.get(url)

    except requests.exceptions.ConnectionError:
        if print_result:
            print(f"Could not connect to {url}, please ensure there are no spelling mistakes")
        raise ValueError(f"Could not connect to {url}, please ensure there are no spelling mistakes")
    except Exception as identifier:
        if print_result:
            print(f"Error while checking {url} \nError Code: {identifier}")
        return [f"Error while checking {url} \nError Code: {identifier}"]

    output = []  # The result of the response
    if response.history:  # If the request was redirected
        if ignored_domains:
            logging.debug("Skipping ignored domains")
            response.history = _skip_ignored_domains(response.history, ignored_domains)
        if print_result:
            print(f"\nPrinting response for {url}")
        for level, redirect in enumerate(response.history):
            logging.debug(f"Appending redirect {redirect.url} to output")
            output.append([level+1, redirect.url, redirect.status_code])
        output.append([len(output)+1, response.url, response.status_code])
        if print_result:
            logging.debug("Printing result(s)")
            for redirect in output:
                print(f"\nRedirect level:{redirect[0]} \nURL: {redirect[1]} \nHTTP Code: {redirect[2]}")
        logging.info(f"Exiting trace() and returning {output}") 
        return output
    else:  # If the request was not redirected
        if print_result:
            print("Request was not redirected")
        logging.info("Exiting trace() and returning ['Request was not redirected']") 
        return ["Request was not redirected"]