Skip to content

Google Gemini

This module helps in generating texts using the Gemini API.

GoogleClient

Bases: BatchClient

Source code in dactyl_generation/google_generation.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
class GoogleClient(BatchClient):
    def __init__(self, api_key: str):
        """
        Constructor for Google's Gemini API.

        Args:
            api_key: Gemini API key.
        """
        super().__init__()
        self.client = genai.Client(api_key=api_key)

    def upload_prompts(self, jsonl_path: str, display_name:str) -> genai.types.File:
        """
        Uploads JSONL file to Google Cloud.

        Args:
            jsonl_path: Local path to JSONL file containing prompts.
            display_name: Name of file on Google cloud to upload to.

        Returns:
            uploaded_file: Uploaded file object
        """
        return self.client.files.upload(file=jsonl_path, config=types.UploadFileConfig(display_name=display_name,mime_type="jsonl"))

    def create_batch_job(self, jsonl_path: str, jsonl_display_name: str, model: str, batch_display_name: str) -> dict:
        """
        Creates and starts batch job with the Gemini API. 

        Args:
            jsonl_path: Local path to JSONL file containing prompts.
            jsonl_display_name: Name of file on Google cloud to upload to.
            model: Name of LLM to use.
            batch_display_name: Batch display name to show.

        Returns:
            batch_info: Dictionary containing batch information.
        """
        uploaded_file = self.upload_prompts(jsonl_path, jsonl_display_name)
        prompts_df = pd.read_json(jsonl_path,lines=True)
        batch_job = self.client.batches.create(model=model, src=uploaded_file.name, config={DISPLAY_NAME: batch_display_name})
        batch_name = batch_job.name
        return {
            BATCH: batch_name,
            INPUT_FILE: prompts_df.to_dict(orient="records"),
            API_CALL: GEMINI
        }

    def get_batch_job_output(self, file_path: str) -> pd.DataFrame:
        """
        Fetches batch inference results and returns as pandas DataFrame.

        Args:
            file_path: JSON file containing object returned by `create_batch_job function`

        Returns:
            dataframe: Results containing responses and prompts.

        """
        with open(file_path, 'r') as f:
            batch_info = json.load(f)
        batch_job = self.client.batches.get(name = batch_info[BATCH])
        prompts_df = pd.DataFrame(batch_info[INPUT_FILE])
        if batch_job.state.name == 'JOB_STATE_SUCCEEDED':

            # If batch job was created with a file
            if batch_job.dest and batch_job.dest.file_name:
                # Results are in a file
                result_file_name = batch_job.dest.file_name
                file_content = self.client.files.download(file=result_file_name)
                lines = file_content.decode('utf-8').splitlines()
                responses = [json.loads(line.strip()) for line in lines if line.strip()]
                normalized_rows = list()
                for response in responses:
                    row = {KEY: response[KEY]}
                    obj = response[RESPONSE]
                    row[TEXT] = obj[CANDIDATES][0][CONTENT][PARTS][0][TEXT]
                    for key in obj:
                        if key == CANDIDATES:
                            continue
                        else:
                            row[key] = obj[key]
                    normalized_rows.append(row)
                results = pd.DataFrame(normalized_rows)
                results = results.merge(prompts_df, how="left",on=KEY)
                return results
        return pd.DataFrame()

__init__(api_key)

Constructor for Google's Gemini API.

Parameters:

Name Type Description Default
api_key str

Gemini API key.

required
Source code in dactyl_generation/google_generation.py
14
15
16
17
18
19
20
21
22
def __init__(self, api_key: str):
    """
    Constructor for Google's Gemini API.

    Args:
        api_key: Gemini API key.
    """
    super().__init__()
    self.client = genai.Client(api_key=api_key)

create_batch_job(jsonl_path, jsonl_display_name, model, batch_display_name)

Creates and starts batch job with the Gemini API.

Parameters:

Name Type Description Default
jsonl_path str

Local path to JSONL file containing prompts.

required
jsonl_display_name str

Name of file on Google cloud to upload to.

required
model str

Name of LLM to use.

required
batch_display_name str

Batch display name to show.

required

Returns:

Name Type Description
batch_info dict

Dictionary containing batch information.

Source code in dactyl_generation/google_generation.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def create_batch_job(self, jsonl_path: str, jsonl_display_name: str, model: str, batch_display_name: str) -> dict:
    """
    Creates and starts batch job with the Gemini API. 

    Args:
        jsonl_path: Local path to JSONL file containing prompts.
        jsonl_display_name: Name of file on Google cloud to upload to.
        model: Name of LLM to use.
        batch_display_name: Batch display name to show.

    Returns:
        batch_info: Dictionary containing batch information.
    """
    uploaded_file = self.upload_prompts(jsonl_path, jsonl_display_name)
    prompts_df = pd.read_json(jsonl_path,lines=True)
    batch_job = self.client.batches.create(model=model, src=uploaded_file.name, config={DISPLAY_NAME: batch_display_name})
    batch_name = batch_job.name
    return {
        BATCH: batch_name,
        INPUT_FILE: prompts_df.to_dict(orient="records"),
        API_CALL: GEMINI
    }

get_batch_job_output(file_path)

Fetches batch inference results and returns as pandas DataFrame.

Parameters:

Name Type Description Default
file_path str

JSON file containing object returned by create_batch_job function

required

Returns:

Name Type Description
dataframe DataFrame

Results containing responses and prompts.

Source code in dactyl_generation/google_generation.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def get_batch_job_output(self, file_path: str) -> pd.DataFrame:
    """
    Fetches batch inference results and returns as pandas DataFrame.

    Args:
        file_path: JSON file containing object returned by `create_batch_job function`

    Returns:
        dataframe: Results containing responses and prompts.

    """
    with open(file_path, 'r') as f:
        batch_info = json.load(f)
    batch_job = self.client.batches.get(name = batch_info[BATCH])
    prompts_df = pd.DataFrame(batch_info[INPUT_FILE])
    if batch_job.state.name == 'JOB_STATE_SUCCEEDED':

        # If batch job was created with a file
        if batch_job.dest and batch_job.dest.file_name:
            # Results are in a file
            result_file_name = batch_job.dest.file_name
            file_content = self.client.files.download(file=result_file_name)
            lines = file_content.decode('utf-8').splitlines()
            responses = [json.loads(line.strip()) for line in lines if line.strip()]
            normalized_rows = list()
            for response in responses:
                row = {KEY: response[KEY]}
                obj = response[RESPONSE]
                row[TEXT] = obj[CANDIDATES][0][CONTENT][PARTS][0][TEXT]
                for key in obj:
                    if key == CANDIDATES:
                        continue
                    else:
                        row[key] = obj[key]
                normalized_rows.append(row)
            results = pd.DataFrame(normalized_rows)
            results = results.merge(prompts_df, how="left",on=KEY)
            return results
    return pd.DataFrame()

upload_prompts(jsonl_path, display_name)

Uploads JSONL file to Google Cloud.

Parameters:

Name Type Description Default
jsonl_path str

Local path to JSONL file containing prompts.

required
display_name str

Name of file on Google cloud to upload to.

required

Returns:

Name Type Description
uploaded_file File

Uploaded file object

Source code in dactyl_generation/google_generation.py
24
25
26
27
28
29
30
31
32
33
34
35
def upload_prompts(self, jsonl_path: str, display_name:str) -> genai.types.File:
    """
    Uploads JSONL file to Google Cloud.

    Args:
        jsonl_path: Local path to JSONL file containing prompts.
        display_name: Name of file on Google cloud to upload to.

    Returns:
        uploaded_file: Uploaded file object
    """
    return self.client.files.upload(file=jsonl_path, config=types.UploadFileConfig(display_name=display_name,mime_type="jsonl"))