'Spark SQL error : org.apache.spark.sql.catalyst.parser.ParseException: extraneous input '$' expecting

I am forming a query in a String Builder like below :

println(dataQuery)

Execution started at 2019-10-31 02:58:24.006019 PST
res245: String =
" SELECT transaction_created_date, txn_mth, txn_mth_id, breakout_y_n, cast($counter as Int) AS arrival_days, cast(date_sub(date_add(transaction_created_date,$counter),day(transaction_created_date)) as String) as Arrival_date,trim(cast(getDayOfWeek(cast(date_sub(date_add(transaction_created_date,$counter),day(transaction_created_date)) as String)) as String)) as weekday,cast(ceil($counter/7)as Int) as week_no, sum(if(arrival_day_base<=$counter,gross,0)) as GROSS, sum(if(arrival_day_base<=$counter,nbc,0)) as NBC, sum(if(arrival_day_base<=$counter,nbr,0)) as NBR, sum(if(arrival_day_base<=$counter,dp,0)) as DP, sum(if(arrival_day_base==$counter,gross,0)) as DAYGROSS, sum(if(arrival_day_base==$counter,nbc,0)) as DAYNBC, sum(if(arrival_day_base==$counter,nbr,0)) as DAYNBR, , sum(if(arrival_day_base==$counter,dp,0)) as DAYDP,
FROM BASE_DLV
GROUP BY transaction_created_date, txn_mth, txn_mth_id, breakout_y_n, arrival_days, arrival_date, weekday, week_no

when executing it as sql val data3 = spark.sql(dataQuery)

getting below error:

org.apache.spark.sql.catalyst.parser.ParseException:
extraneous input '$' expecting {'SELECT', 'FROM', 'ADD', 'AS', 'ALL', 'DISTINCT', 'WHERE', 'GROUP', 'BY', 'GROUPING', 'SETS', 'CUBE', 'ROLLUP', 'ORDER', 'HAVING', 'LIMIT', 'AT', 'OR', 'AND', 'IN', NOT, 'NO', 'EXISTS', 'BETWEEN', 'LIKE', RLIKE, 'IS', 'NULL', 'TRUE', 'FALSE', 'NULLS', 'ASC', 'DESC', 'FOR', 'INTERVAL', 'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'JOIN', 'CROSS', 'OUTER', 'INNER', 'LEFT', 'SEMI', 'RIGHT', 'FULL', 'NATURAL', 'ON', 'LATERAL', 'WINDOW', 'OVER', 'PARTITION', 'RANGE', 'ROWS', 'UNBOUNDED', 'PRECEDING', 'FOLLOWING', 'CURRENT', 'FIRST', 'AFTER', 'LAST', 'ROW', 'WITH', 'VALUES', 'CREATE', 'TABLE', 'DIRECTORY', 'VIEW', 'REPLACE', 'INSERT', 'DELETE', 'INTO', 'DESCRIBE', 'EXPLAIN', 'FORMAT', 'LOGICAL', 'CODEGEN', 'COST', 'CAST', 'SHOW', 'TABLES', 'COLUMNS', 'COLUMN', 'USE', 'PARTITIONS', 'FUNCTIONS', 'DROP', 'UNION', 'EXCEPT', 'MINUS', 'INTERSECT', 'TO', 'TABLESAMPLE', 'STRATIFY', 'ALTER', 'RENAME', 'ARRAY', 'MAP', 'STRUCT', 'COMMENT', 'SET', 'RESET', 'DATA', 'START', 'TRANSACTION', 'COMMIT', 'ROLLBACK', 'MACRO', 'IGNORE', 'BOTH', 'LEADING', 'TRAILING', 'IF', 'POSITION', 'DIV', 'PERCENT', 'BUCKET', 'OUT', 'OF', 'SORT', 'CLUSTER', 'DISTRIBUTE', 'OVERWRITE', 'TRANSFORM', 'REDUCE', 'SERDE', 'SERDEPROPERTIES', 'RECORDREADER', 'RECORDWRITER', 'DELIMITED', 'FIELDS', 'TERMINATED', 'COLLECTION', 'ITEMS', 'KEYS', 'ESCAPED', 'LINES', 'SEPARATED', 'FUNCTION', 'EXTENDED', 'REFRESH', 'CLEAR', 'CACHE', 'UNCACHE', 'LAZY', 'FORMATTED', 'GLOBAL', TEMPORARY, 'OPTIONS', 'UNSET', 'TBLPROPERTIES', 'DBPROPERTIES', 'BUCKETS', 'SKEWED', 'STORED', 'DIRECTORIES', 'LOCATION', 'EXCHANGE', 'ARCHIVE', 'UNARCHIVE', 'FILEFORMAT', 'TOUCH', 'COMPACT', 'CONCATENATE', 'CHANGE', 'CASCADE', 'RESTRICT', 'CLUSTERED', 'SORTED', 'PURGE', 'INPUTFORMAT', 'OUTPUTFORMAT', DATABASE, DATABASES, 'DFS', 'TRUNCATE', 'ANALYZE', 'COMPUTE', 'LIST', 'STATISTICS', 'PARTITIONED', 'EXTERNAL', 'DEFINED', 'REVOKE', 'GRANT', 'LOCK', 'UNLOCK', 'MSCK', 'REPAIR', 'RECOVER', 'EXPORT', 'IMPORT', 'LOAD', 'ROLE', 'ROLES', 'COMPACTIONS', 'PRINCIPALS', 'TRANSACTIONS', 'INDEX', 'INDEXES', 'LOCKS', 'OPTION', 'ANTI', 'LOCAL', 'INPATH', IDENTIFIER, BACKQUOTED_IDENTIFIER}(line 1, pos 74)

== SQL ==
 SELECT transaction_created_date, txn_mth, txn_mth_id, breakout_y_n, cast($counter as Int) AS arrival_days, cast(date_sub(date_add(transaction_created_date,$counter),day(transaction_created_date)) as String) as Arrival_date,trim(cast(getDayOfWeek(cast(date_sub(date_add(transaction_created_date,$counter),day(transaction_created_date)) as String)) as String)) as weekday,cast(ceil($counter/7)as Int) as week_no, sum(if(arrival_day_base<=$counter,gross,0)) as GROSS, sum(if(arrival_day_base<=$counter,nbc,0)) as NBC, sum(if(arrival_day_base<=$counter,nbr,0)) as NBR, sum(if(arrival_day_base<=$counter,dp,0)) as DP, sum(if(arrival_day_base==$counter,gross,0)) as DAYGROSS, sum(if(arrival_day_base==$counter,nbc,0)) as DAYNBC, sum(if(arrival_day_base==$counter,nbr,0)) as DAYNBR, sum(if(arrival_day_base==$counter,dp,0)) as DAYDP
--------------------------------------------------------------------------^^^
FROM BASE_DLV
GROUP BY transaction_created_date, txn_mth, txn_mth_id, breakout_y_n, arrival_days, arrival_date, weekday, week_no

  at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:239)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:115)
  at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:69)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:641)
  ... 71 elided

also tried to run the same query directly

val data2 =spark.sql(s"""SELECT transaction_created_date, txn_mth, txn_mth_id, breakout_y_n, 
cast($counter as Int) AS arrival_days, 
cast(date_sub(date_add(transaction_created_date,$counter),day(transaction_created_date)) as String) as Arrival_date,
trim(cast(getDayOfWeek(cast(date_sub(date_add(transaction_created_date,$counter),day(transaction_created_date)) as String)) as String)) as weekday,
cast(ceil($counter/7)as Int) as week_no,
sum(if(arrival_day_base<=$counter,gross,0)) as GROSS, 
sum(if(arrival_day_base<=$counter,nbc,0)) as NBC, 
sum(if(arrival_day_base<=$counter,nbr,0)) as NBR,
 sum(if(arrival_day_base<=$counter,dp,0)) as DP, 
 sum(if(arrival_day_base==$counter,gross,0)) as DAYGROSS, 
 sum(if(arrival_day_base==$counter,nbc,0)) as DAYNBC,
  sum(if(arrival_day_base==$counter,nbr,0)) as DAYNBR, 
  sum(if(arrival_day_base==$counter,dp,0)) as DAYDP
FROM BASE_DLV
GROUP BY transaction_created_date, txn_mth, txn_mth_id, breakout_y_n, arrival_days, arrival_date, weekday, week_no""")

and it is executing successfully

Execution started at 2019-10-31 02:51:32.451289 PST
data2: org.apache.spark.sql.DataFrame = [transaction_created_date: string, txn_mth: string ... 14 more fields]
Execution completed at 2019-10-31 02:51:34.532190 PST in 2.08 s

but getting same parse error on trying

val data3 = spark.sql(s"""$dataQuery""")

can anyone please help with the using the stringBuilder in spark.sql() without the issue



Solution 1:[1]

dataQuery should have counter defined and evaluated

val counter = 10
val dataQuery = s"select $counter as cnt" //gives select 10 as cnt

spark.sql(s"$dataQuery").show()

shows

+---+
|cnt|
+---+
| 10|
+---+

Solution 2:[2]

I think what you are noticing is in scala multi line queries need """ triple quotes around multi line SQL statements.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 arbuzov
Solution 2 GenericDisplayName