o
    Zhp                  
   @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlm	Z	 d dlm
Z
mZ zd dlmZ W n ey7   dZY nw ejjZd;ddZdd Zejd	d
dgdd Zejdedddfdd Zdd Zejd	d
dgdd Zdd Zdd Zejd	d
dgdd Zdd Zd d! Zejd	d
dgd"d# Zd$d% Zejd	d
dgd&d' Z d(d) Z!d*d+ Z"ejd,e#e$d-d.e#e$d-e$d/e$d-d0kgd1d2 Z%d3d4 Z&d5d6 Z'd7d8 Z(d9d: Z)dS )<    N)tobytes)ArrowInvalidArrowNotImplementedError
   c                 C   s   ddl m} |t | S )Nr   )_get_udf_context)Zpyarrow._computer   paZdefault_memory_pool)Zbatch_lengthr    r   S/var/www/html/lang_env/lib/python3.10/site-packages/pyarrow/tests/test_substrait.pymock_udf_context&   s   r
   c                 C   sT   t jt| |}tjj||jd}|| W d    |S 1 s#w   Y  |S )N)schema)	ospathjoinstrr   ZipcZRecordBatchFileWriterr   Zwrite_table)tmpdir	file_nametabler   writerr   r   r	   _write_dummy_data_to_disk+   s   
r   use_threadsTFc           
      C   s   d}d}t jg dgdgd}t| ||}t|dt| }t j	|}t
j||d}| }	|dg|	dgksAJ d S )Nax  
    {
        "version": { "major": 9999 },
        "relations": [
        {"rel": {
            "read": {
            "base_schema": {
                "struct": {
                "types": [
                            {"i64": {}}
                        ]
                },
                "names": [
                        "foo"
                        ]
            },
            "local_files": {
                "items": [
                {
                    "uri_file": "FILENAME_PLACEHOLDER",
                    "arrow": {}
                }
                ]
            }
            }
        }}
        ]
    }
    zread_data.arrow               foonamesFILENAME_PLACEHOLDERr   r   r   r   r   replacepathlibPathas_uri
_substrait_parse_json_plan	substrait	run_queryread_allselect
r   r   substrait_queryr   r   r   querybufreaderres_tbr   r   r	   test_run_serialized_query2   s    r2   r.   s   buffers   bytesr   c                 C   s   t |tjtfs/dt| d}tjt|d t	| W d    d S 1 s(w   Y  d S d}tjt
|d t	| W d    d S 1 sIw   Y  d S )Nz)Expected 'pyarrow.Buffer' or bytes, got ''matchz1ParseFromZeroCopyStream failed for substrait.Plan)
isinstancer   Bufferbytestypepytestraises	TypeErrorr(   r)   OSError)r   r.   msgr   r   r	   test_run_query_input_types`   s   
"r?   c                  C   sV   d} t jt| }d}tjt|d t| W d    d S 1 s$w   Y  d S )Nz2
    {
        "relations": [
        ]
    }
    zPlan has no relationsr4   )	r   r&   r'   r   r:   r;   r   r(   r)   )r.   r/   exec_messager   r   r	   test_invalid_planp   s   "rA   c           
      C   s   d}d}t jg dgdgd}t| ||}t|dt| }t j	t|}t
j||d}| }	|dg|	dgksCJ d S )Na  
    {
        "version": { "major": 9999 },
        "relations": [
        {"rel": {
            "read": {
            "base_schema": {
                "struct": {
                "types": [
                            {"i64": {}}
                        ]
                },
                "names": [
                        "bar"
                        ]
            },
            "local_files": {
                "items": [
                {
                    "uri_file": "FILENAME_PLACEHOLDER",
                    "arrow": {},
                    "metadata" : {
                      "created_by" : {},
                    }
                }
                ]
            }
            }
        }}
        ]
    }
    zbinary_json_data.arrowr   barr   r   r    r!   r,   r   r   r	   (test_binary_conversion_with_json_options}   s   ! rC   c                 C   s,   | d| }| D ]
}| |r dS q	dS )N#TF)endswith)ZfnsZext_filefn_namesuffixfnr   r   r	   has_function   s   
rI   c                  C   s.   t j } t| ddsJ t| ddsJ d S )Nzfunctions_arithmetic.yamladdsum)r   r&   Zget_supported_functionsrI   )Zsupported_functionsr   r   r	   test_get_supported_functions   s   
rL   c                    s   t jdg dit jdg dit t dt  g  fdd}d}t jt|}t j	j
||| d}| }|ksEJ d S )Nxr   r   r   )r   r      c                    s@   | st d| d dkr| ksJ S | d dkrS t d)NNo names providedr   t1r   t2Unrecognized table name	Exceptionr   r   Zschema_1test_table_1Ztest_table_2r   r	   table_provider   s   z(test_named_table.<locals>.table_providera  
    {
        "version": { "major": 9999 },
        "relations": [
        {"rel": {
            "read": {
            "base_schema": {
                "struct": {
                "types": [
                            {"i64": {}}
                        ]
                },
                "names": [
                        "x"
                        ]
            },
            "namedTable": {
                    "names": ["t1"]
            }
            }
        }}
        ]
    }
    rY   r   )r   Tablefrom_pydictr   fieldZint64r&   r'   r   r(   r)   r*   )r   rY   r-   r/   r0   r1   r   rW   r	   test_named_table   s   r^   c                     sz   t jdg di  fdd} d}t jt|}d}tjt|d t	j
|| d W d    d S 1 s6w   Y  d S )	NrM   rN   c                    $   | st d| d dkr S t dNrP   r   rQ   rS   rT   r   _rX   r   r	   rY      
   z;test_named_table_invalid_table_name.<locals>.table_providera  
    {
        "version": { "major": 9999 },
        "relations": [
        {"rel": {
            "read": {
            "base_schema": {
                "struct": {
                "types": [
                            {"i64": {}}
                        ]
                },
                "names": [
                        "x"
                        ]
            },
            "namedTable": {
                    "names": ["t3"]
            }
            }
        }}
        ]
    }
    zInvalid NamedTable Sourcer4   rY   )r   r[   r\   r&   r'   r   r:   r;   r   r(   r)   )rY   r-   r/   r@   r   rc   r	   #test_named_table_invalid_table_name   s   "rf   c                     s   t jdg di  fdd} d}t|}t jt|}d}tjt|d t	j
|| d W d    d S 1 s:w   Y  d S )	NrM   rN   c                    r_   r`   rT   ra   rc   r   r	   rY     rd   z4test_named_table_empty_names.<locals>.table_providera  
    {
        "version": { "major": 9999 },
        "relations": [
        {"rel": {
            "read": {
            "base_schema": {
                "struct": {
                "types": [
                            {"i64": {}}
                        ]
                },
                "names": [
                        "x"
                        ]
            },
            "namedTable": {
                    "names": []
            }
            }
        }}
        ]
    }
    z!names for NamedTable not providedr4   re   )r   r[   r\   r   r&   r'   r:   r;   r   r(   r)   )rY   r-   r.   r/   r@   r   rc   r	   test_named_table_empty_names  s   "rg   c           
         s~   t jdg di  fdd}d}t j|}t jj|||d}| }| \}} dd|t	d	 d }	||	ks=J d S )
NrM   rN   c                    r_   r`   rT   ra   Z
test_tabler   r	   rY   J  rd   z.test_udf_via_substrait.<locals>.table_providers	  
    {
  "extensionUris": [
    {
      "extensionUriAnchor": 1
    },
    {
      "extensionUriAnchor": 2,
      "uri": "urn:arrow:substrait_simple_extension_function"
    }
  ],
  "extensions": [
    {
      "extensionFunction": {
        "extensionUriReference": 2,
        "functionAnchor": 1,
        "name": "y=x+1"
      }
    }
  ],
  "relations": [
    {
      "root": {
        "input": {
          "project": {
            "common": {
              "emit": {
                "outputMapping": [
                  1,
                  2,
                ]
              }
            },
            "input": {
              "read": {
                "baseSchema": {
                  "names": [
                    "t",
                  ],
                  "struct": {
                    "types": [
                      {
                        "i64": {
                          "nullability": "NULLABILITY_REQUIRED"
                        }
                      },
                    ],
                    "nullability": "NULLABILITY_REQUIRED"
                  }
                },
                "namedTable": {
                  "names": [
                    "t1"
                  ]
                }
              }
            },
            "expressions": [
              {
                "selection": {
                  "directReference": {
                    "structField": {}
                  },
                  "rootReference": {}
                }
              },
              {
                "scalarFunction": {
                  "functionReference": 1,
                  "outputType": {
                    "i64": {
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  },
                  "arguments": [
                    {
                      "value": {
                        "selection": {
                          "directReference": {
                            "structField": {}
                          },
                          "rootReference": {}
                        }
                      }
                    }
                  ]
                }
              }
            ]
          }
        },
        "names": [
          "x",
          "y",
        ]
      }
    }
  ]
}
    rZ   r   yr   )
r   r[   r\   r&   r'   r(   r)   r*   
add_columnr
   )
Zunary_func_fixturer   rY   r-   r/   r0   r1   functionnameexpected_tbr   rh   r	   test_udf_via_substraitF  s   e
rn   c                     s   t jdg di  fdd} d}t j|}tt j}t jj	|| d W d    n1 s1w   Y  dt
|jv s?J d S )NrM   rN   c                    r_   r`   rT   ra   rh   r   r	   rY     rd   z=test_udf_via_substrait_wrong_udf_name.<locals>.table_providers	  
    {
  "extensionUris": [
    {
      "extensionUriAnchor": 1
    },
    {
      "extensionUriAnchor": 2,
      "uri": "urn:arrow:substrait_simple_extension_function"
    }
  ],
  "extensions": [
    {
      "extensionFunction": {
        "extensionUriReference": 2,
        "functionAnchor": 1,
        "name": "wrong_udf_name"
      }
    }
  ],
  "relations": [
    {
      "root": {
        "input": {
          "project": {
            "common": {
              "emit": {
                "outputMapping": [
                  1,
                  2,
                ]
              }
            },
            "input": {
              "read": {
                "baseSchema": {
                  "names": [
                    "t",
                  ],
                  "struct": {
                    "types": [
                      {
                        "i64": {
                          "nullability": "NULLABILITY_REQUIRED"
                        }
                      },
                    ],
                    "nullability": "NULLABILITY_REQUIRED"
                  }
                },
                "namedTable": {
                  "names": [
                    "t1"
                  ]
                }
              }
            },
            "expressions": [
              {
                "selection": {
                  "directReference": {
                    "structField": {}
                  },
                  "rootReference": {}
                }
              },
              {
                "scalarFunction": {
                  "functionReference": 1,
                  "outputType": {
                    "i64": {
                      "nullability": "NULLABILITY_NULLABLE"
                    }
                  },
                  "arguments": [
                    {
                      "value": {
                        "selection": {
                          "directReference": {
                            "structField": {}
                          },
                          "rootReference": {}
                        }
                      }
                    }
                  ]
                }
              }
            ]
          }
        },
        "names": [
          "x",
          "y",
        ]
      }
    }
  ]
}
    re   zNo function registered)r   r[   r\   r&   r'   r:   r;   ZArrowKeyErrorr(   r)   r   value)rY   r-   r/   excinfor   rh   r	   %test_udf_via_substrait_wrong_udf_name  s   erq   c                    sr   t jdg di  fdd}d}t jt|}t jj||| d}| }t jdg di}||ks7J d S )NrM   rN   c                        S Nr   rV   Zin_tabler   r	   rY   <     z/test_output_field_names.<locals>.table_providera  
    {
      "version": { "major": 9999 },
      "relations": [
        {
          "root": {
            "input": {
              "read": {
                "base_schema": {
                  "struct": {
                    "types": [{"i64": {}}]
                  },
                  "names": ["x"]
                },
                "namedTable": {
                  "names": ["t1"]
                }
              }
            },
            "names": ["out"]
          }
        }
      ]
    }
    rZ   out)	r   r[   r\   r&   r'   r   r(   r)   r*   )r   rY   r-   r/   r0   r1   expectedr   rt   r	   test_output_field_names8  s   rx   c                    s   t jg dg dg dd  fdd}d}t j|}t jj||dd	}| }t jd
dgddgd}||ks>J d S )Nr   r   r   r   )r   r   r   r   )      ?rz   rz   rz   )kv1v2c                    rr   rs   r   ra   rh   r   r	   rY   j  ru   z7test_scalar_aggregate_udf_basic.<locals>.table_providers  
{
  "extensionUris": [
    {
      "extensionUriAnchor": 1,
      "uri": "urn:arrow:substrait_simple_extension_function"
    },
  ],
  "extensions": [
    {
      "extensionFunction": {
        "extensionUriReference": 1,
        "functionAnchor": 1,
        "name": "sum_mean"
      }
    }
  ],
  "relations": [
    {
      "root": {
        "input": {
          "extensionSingle": {
            "common": {
              "emit": {
                "outputMapping": [
                  0,
                  1
                ]
              }
            },
            "input": {
              "read": {
                "baseSchema": {
                  "names": [
                    "k",
                    "v1",
                    "v2",
                  ],
                  "struct": {
                    "types": [
                      {
                        "i64": {
                          "nullability": "NULLABILITY_REQUIRED"
                        }
                      },
                      {
                        "i64": {
                          "nullability": "NULLABILITY_NULLABLE"
                        }
                      },
                      {
                        "fp64": {
                          "nullability": "NULLABILITY_NULLABLE"
                        }
                      }
                    ],
                    "nullability": "NULLABILITY_REQUIRED"
                  }
                },
                "namedTable": {
                  "names": ["t1"]
                }
              }
            },
            "detail": {
              "@type": "/arrow.substrait_ext.SegmentedAggregateRel",
              "segmentKeys": [
                {
                  "directReference": {
                    "structField": {}
                  },
                  "rootReference": {}
                }
              ],
              "measures": [
                {
                  "measure": {
                    "functionReference": 1,
                    "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
                    "outputType": {
                      "fp64": {
                        "nullability": "NULLABILITY_NULLABLE"
                      }
                    },
                    "arguments": [
                      {
                        "value": {
                          "selection": {
                            "directReference": {
                              "structField": {
                                "field": 1
                              }
                            },
                            "rootReference": {}
                          }
                        }
                      },
                      {
                        "value": {
                          "selection": {
                            "directReference": {
                              "structField": {
                                "field": 2
                              }
                            },
                            "rootReference": {}
                          }
                        }
                      }
                    ]
                  }
                }
              ]
            }
          }
        },
        "names": [
          "k",
          "v_avg"
        ]
      }
    }
  ],
}
FrZ   r   r   g      @g      @)r{   v_avgr   r[   r\   r&   r'   r(   r)   r*   Zvarargs_agg_func_fixturerY   r-   r/   r0   r1   rm   r   rh   r	   test_scalar_aggregate_udf_basicc  s"   }r   c                    s   t jg dg dg dg dd  fdd}d}t j|}t jj||d	d
}| }t jg dg dg dd}||ksDJ d S )N)r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   rO         )rz   rz   rz   rz   g       @g      @g      @g      @)tr{   r|   r}   c                    rr   rs   r   ra   rh   r   r	   rY      ru   z5test_hash_aggregate_udf_basic.<locals>.table_providers  
{
  "extensionUris": [
    {
      "extensionUriAnchor": 1,
      "uri": "urn:arrow:substrait_simple_extension_function"
    },
  ],
  "extensions": [
    {
      "extensionFunction": {
        "extensionUriReference": 1,
        "functionAnchor": 1,
        "name": "sum_mean"
      }
    }
  ],
  "relations": [
    {
      "root": {
        "input": {
          "extensionSingle": {
            "common": {
              "emit": {
                "outputMapping": [
                  0,
                  1,
                  2
                ]
              }
            },
            "input": {
              "read": {
                "baseSchema": {
                  "names": [
                    "t",
                    "k",
                    "v1",
                    "v2",
                  ],
                  "struct": {
                    "types": [
                      {
                        "i64": {
                          "nullability": "NULLABILITY_REQUIRED"
                        }
                      },
                      {
                        "i64": {
                          "nullability": "NULLABILITY_REQUIRED"
                        }
                      },
                      {
                        "i64": {
                          "nullability": "NULLABILITY_NULLABLE"
                        }
                      },
                      {
                        "fp64": {
                          "nullability": "NULLABILITY_NULLABLE"
                        }
                      }
                    ],
                    "nullability": "NULLABILITY_REQUIRED"
                  }
                },
                "namedTable": {
                  "names": ["t1"]
                }
              }
            },
            "detail": {
              "@type": "/arrow.substrait_ext.SegmentedAggregateRel",
              "groupingKeys": [
                {
                  "directReference": {
                    "structField": {
                      "field": 1
                    }
                  },
                  "rootReference": {}
                }
              ],
              "segmentKeys": [
                {
                  "directReference": {
                    "structField": {}
                  },
                  "rootReference": {}
                }
              ],
              "measures": [
                {
                  "measure": {
                    "functionReference": 1,
                    "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
                    "outputType": {
                      "fp64": {
                        "nullability": "NULLABILITY_NULLABLE"
                      }
                    },
                    "arguments": [
                      {
                        "value": {
                          "selection": {
                            "directReference": {
                              "structField": {
                                "field": 2
                              }
                            },
                            "rootReference": {}
                          }
                        }
                      },
                      {
                        "value": {
                          "selection": {
                            "directReference": {
                              "structField": {
                                "field": 3
                              }
                            },
                            "rootReference": {}
                          }
                        }
                      }
                    ]
                  }
                }
              ]
            }
          }
        },
        "names": [
          "t",
          "k",
          "v_avg"
        ]
      }
    }
  ],
}
FrZ   ry   )r   r   r   r   )      @r   g      "@g      &@)r   r{   r~   r   r   r   rh   r	   test_hash_aggregate_udf_basic  s*    r   exprrM   r   ri   2   c                 C   sx   t t dt  t dt  g}t j| gdg|}t j|}||jks*J t|jdks3J d|jv s:J d S )NrM   ri   Z	test_exprr   )	r   r   r]   int32r(   serialize_expressionsdeserialize_expressionslenexpressions)r   r   r/   returnedr   r   r	   test_serializing_expressions  s   r   c                  C   s>  t t dt  t dt  g} ttdd}ttdd}tt}t j	
|gg |  W d    n1 s>w   Y  dt|jv sLJ tt}t j	
|gddg|  W d    n1 sgw   Y  dt|jv suJ tt}t j	
|gdg|  W d    n1 sw   Y  d	t|jv sJ d S )
NrM   ri   r   zzneed to have the same lengthr   rB   r   zNo match for FieldRef)r   r   r]   r   pcequalr:   r;   
ValueErrorr(   r   r   ro   )r   r   Zbad_exprrp   r   r   r	   test_invalid_expression_ser_des  s$   r   c                  C   s   t t dt  t dt  g} ttddttdtdg}t j|ddg| }t j|}| |jks?J t	|j
dksHJ ttddttdtdg}t|j
d t|d kslJ t|j
d t|d ks{J d S )	NrM   ri   r   firstsecondr   r   r   )r   r   r]   r   r   r   r(   r   r   r   r   r   )r   exprsr/   r   Z
norm_exprsr   r   r	   %test_serializing_multiple_expressions  s   **"r   c                  C   s2  t t dt  t dt  g} ttdd}ttdd}|| }t j|}| |jks7J t	|j
dks@J t|j
d t|ksMJ t j||gddg| }tt}tj| W d    n1 sow   Y  d	t|jv s}J t j|gd
g| }tj|}t|t|ksJ d S )NrM   ri   r   r   r   
expressionr   r   zcontained multiple expressionsZ	weirdname)r   r   r]   r   r   r   Zto_substraitr(   r   r   r   r   r   r:   r;   r   
ExpressionZfrom_substraitro   )r   r   Z	expr_normr/   r   rp   Zexpr2r   r   r	   test_serializing_with_compute  s&   
r   c                  C   s   t t dt  g} td}td}t||g}tt	 t j
|dg|  W d    n1 s6w   Y  t j
j|dg| dd}t j
|}| |jksSJ t|jdks\J t|jd t|d kskJ d S )	NrM   r   r   r   T)Zallow_arrow_extensionsr   r   )r   r   r]   Zuint32r   ZscalarZ
shift_leftr:   r;   r   r(   r   r   r   r   r   )r   abr   r/   r   r   r   r	   test_serializing_udfs  s    


"r   )r   )*r   r#   r:   Zpyarrowr   Zpyarrow.computeZcomputer   Zpyarrow.libr   r   r   Zpyarrow.substraitr(   ImportErrormarkZ
pytestmarkr
   r   Zparametrizer2   Z	py_bufferr?   rA   rC   rI   rL   r^   rf   rg   rn   rq   rx   r   r   r   r]   r   r   r   r   r   r   r   r   r	   <module>   s^   

-

1

0**
{v
*  +
